diff --git a/config/identical-files.json b/config/identical-files.json index 35c9169df56b..71c507e3993a 100644 --- a/config/identical-files.json +++ b/config/identical-files.json @@ -1,5 +1,5 @@ { - "DataFlow Java/C++/C#": [ + "DataFlow Java/C++/C#/Python": [ "java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl.qll", "java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl2.qll", "java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl3.qll", @@ -18,15 +18,18 @@ "csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowImpl2.qll", "csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowImpl3.qll", "csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowImpl4.qll", - "csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowImpl5.qll" + "csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowImpl5.qll", + "python/ql/src/experimental/dataflow/internal/DataFlowImpl.qll", + "python/ql/src/experimental/dataflow/internal/DataFlowImpl2.qll" ], - "DataFlow Java/C++/C# Common": [ + "DataFlow Java/C++/C#/Python Common": [ "java/ql/src/semmle/code/java/dataflow/internal/DataFlowImplCommon.qll", "cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImplCommon.qll", "cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImplCommon.qll", - "csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowImplCommon.qll" + "csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowImplCommon.qll", + "python/ql/src/experimental/dataflow/internal/DataFlowImplCommon.qll" ], - "TaintTracking::Configuration Java/C++/C#": [ + "TaintTracking::Configuration Java/C++/C#/Python": [ "cpp/ql/src/semmle/code/cpp/dataflow/internal/tainttracking1/TaintTrackingImpl.qll", "cpp/ql/src/semmle/code/cpp/dataflow/internal/tainttracking2/TaintTrackingImpl.qll", "cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/tainttracking1/TaintTrackingImpl.qll", @@ -37,13 +40,15 @@ "csharp/ql/src/semmle/code/csharp/dataflow/internal/tainttracking4/TaintTrackingImpl.qll", "csharp/ql/src/semmle/code/csharp/dataflow/internal/tainttracking5/TaintTrackingImpl.qll", "java/ql/src/semmle/code/java/dataflow/internal/tainttracking1/TaintTrackingImpl.qll", - "java/ql/src/semmle/code/java/dataflow/internal/tainttracking2/TaintTrackingImpl.qll" + "java/ql/src/semmle/code/java/dataflow/internal/tainttracking2/TaintTrackingImpl.qll", + "python/ql/src/experimental/dataflow/internal/tainttracking1/TaintTrackingImpl.qll" ], - "DataFlow Java/C++/C# Consistency checks": [ + "DataFlow Java/C++/C#/Python Consistency checks": [ "java/ql/src/semmle/code/java/dataflow/internal/DataFlowImplConsistency.qll", "cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImplConsistency.qll", "cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll", - "csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowImplConsistency.qll" + "csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowImplConsistency.qll", + "python/ql/src/experimental/dataflow/internal/DataFlowImplConsistency.qll" ], "C++ SubBasicBlocks": [ "cpp/ql/src/semmle/code/cpp/controlflow/SubBasicBlocks.qll", diff --git a/python/ql/src/experimental/dataflow/DataFlow.qll b/python/ql/src/experimental/dataflow/DataFlow.qll new file mode 100644 index 000000000000..7f1c2c0c4e58 --- /dev/null +++ b/python/ql/src/experimental/dataflow/DataFlow.qll @@ -0,0 +1,26 @@ +/** + * Provides a library for local (intra-procedural) and global (inter-procedural) + * data flow analysis: deciding whether data can flow from a _source_ to a + * _sink_. + * + * Unless configured otherwise, _flow_ means that the exact value of + * the source may reach the sink. We do not track flow across pointer + * dereferences or array indexing. To track these types of flow, where the + * exact value may not be preserved, import + * `experimental.dataflow.TaintTracking`. + * + * To use global (interprocedural) data flow, extend the class + * `DataFlow::Configuration` as documented on that class. To use local + * (intraprocedural) data flow, call `DataFlow::localFlow` or + * `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`. + */ + +import python + +/** + * Provides classes for performing local (intra-procedural) and + * global (inter-procedural) data flow analyses. + */ +module DataFlow { + import experimental.dataflow.internal.DataFlowImpl +} diff --git a/python/ql/src/experimental/dataflow/DataFlow2.qll b/python/ql/src/experimental/dataflow/DataFlow2.qll new file mode 100644 index 000000000000..cb4d8b60d1e6 --- /dev/null +++ b/python/ql/src/experimental/dataflow/DataFlow2.qll @@ -0,0 +1,26 @@ +/** + * Provides a library for local (intra-procedural) and global (inter-procedural) + * data flow analysis: deciding whether data can flow from a _source_ to a + * _sink_. + * + * Unless configured otherwise, _flow_ means that the exact value of + * the source may reach the sink. We do not track flow across pointer + * dereferences or array indexing. To track these types of flow, where the + * exact value may not be preserved, import + * `experimental.dataflow.TaintTracking`. + * + * To use global (interprocedural) data flow, extend the class + * `DataFlow::Configuration` as documented on that class. To use local + * (intraprocedural) data flow, call `DataFlow::localFlow` or + * `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`. + */ + +import python + +/** + * Provides classes for performing local (intra-procedural) and + * global (inter-procedural) data flow analyses. + */ +module DataFlow2 { + import experimental.dataflow.internal.DataFlowImpl2 +} diff --git a/python/ql/src/experimental/dataflow/TaintTracking.qll b/python/ql/src/experimental/dataflow/TaintTracking.qll new file mode 100644 index 000000000000..b6c14f2d7765 --- /dev/null +++ b/python/ql/src/experimental/dataflow/TaintTracking.qll @@ -0,0 +1,19 @@ +/** + * Provides classes for performing local (intra-procedural) and + * global (inter-procedural) taint-tracking analyses. + * + * To use global (interprocedural) taint tracking, extend the class + * `TaintTracking::Configuration` as documented on that class. To use local + * (intraprocedural) taint tracking, call `TaintTracking::localTaint` or + * `TaintTracking::localTaintStep` with arguments of type `DataFlow::Node`. + */ + +import python + +/** + * Provides classes for performing local (intra-procedural) and + * global (inter-procedural) taint-tracking analyses. + */ +module TaintTracking { + import experimental.dataflow.internal.tainttracking1.TaintTrackingImpl +} \ No newline at end of file diff --git a/python/ql/src/experimental/dataflow/internal/DataFlowImpl.qll b/python/ql/src/experimental/dataflow/internal/DataFlowImpl.qll new file mode 100644 index 000000000000..5042dce683f6 --- /dev/null +++ b/python/ql/src/experimental/dataflow/internal/DataFlowImpl.qll @@ -0,0 +1,2985 @@ +/** + * Provides an implementation of global (interprocedural) data flow. This file + * re-exports the local (intraprocedural) data flow analysis from + * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed + * through the `Configuration` class. This file exists in several identical + * copies, allowing queries to use multiple `Configuration` classes that depend + * on each other without introducing mutual recursion among those configurations. + */ + +private import DataFlowImplCommon +private import DataFlowImplSpecific::Private +import DataFlowImplSpecific::Public + +/** + * A configuration of interprocedural data flow analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the global data flow library must define its own unique extension + * of this abstract class. To create a configuration, extend this class with + * a subclass whose characteristic predicate is a unique singleton string. + * For example, write + * + * ```ql + * class MyAnalysisConfiguration extends DataFlow::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isBarrier`. + * // Optionally override `isAdditionalFlowStep`. + * } + * ``` + * Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and + * the edges are those data-flow steps that preserve the value of the node + * along with any additional edges defined by `isAdditionalFlowStep`. + * Specifying nodes in `isBarrier` will remove those nodes from the graph, and + * specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going + * and/or out-going edges from those nodes, respectively. + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ```ql + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but two classes extending + * `DataFlow::Configuration` should never depend on each other. One of them + * should instead depend on a `DataFlow2::Configuration`, a + * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. + */ +abstract class Configuration extends string { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant data flow source. + */ + abstract predicate isSource(Node source); + + /** + * Holds if `sink` is a relevant data flow sink. + */ + abstract predicate isSink(Node sink); + + /** + * Holds if data flow through `node` is prohibited. This completely removes + * `node` from the data flow graph. + */ + predicate isBarrier(Node node) { none() } + + /** Holds if data flow into `node` is prohibited. */ + predicate isBarrierIn(Node node) { none() } + + /** Holds if data flow out of `node` is prohibited. */ + predicate isBarrierOut(Node node) { none() } + + /** Holds if data flow through nodes guarded by `guard` is prohibited. */ + predicate isBarrierGuard(BarrierGuard guard) { none() } + + /** + * Holds if the additional flow step from `node1` to `node2` must be taken + * into account in the analysis. + */ + predicate isAdditionalFlowStep(Node node1, Node node2) { none() } + + /** + * Gets the virtual dispatch branching limit when calculating field flow. + * This can be overridden to a smaller value to improve performance (a + * value of 0 disables field flow), or a larger value to get more results. + */ + int fieldFlowBranchLimit() { result = 2 } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + */ + predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + * + * The corresponding paths are generated from the end-points and the graph + * included in the module `PathGraph`. + */ + predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowTo(Node sink) { hasFlow(_, sink) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowToExpr(DataFlowExpr sink) { hasFlowTo(exprNode(sink)) } + + /** + * Gets the exploration limit for `hasPartialFlow` measured in approximate + * number of interprocedural steps. + */ + int explorationLimit() { none() } + + /** + * Holds if there is a partial data flow path from `source` to `node`. The + * approximate distance between `node` and the closest source is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards sink definitions. + * + * This predicate is intended for dataflow exploration and debugging and may + * perform poorly if the number of sources is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + */ + final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) { + partialFlow(source, node, this) and + dist = node.getSourceDistance() + } +} + +/** + * This class exists to prevent mutual recursion between the user-overridden + * member predicates of `Configuration` and the rest of the data-flow library. + * Good performance cannot be guaranteed in the presence of such recursion, so + * it should be replaced by using more than one copy of the data flow library. + */ +abstract private class ConfigurationRecursionPrevention extends Configuration { + bindingset[this] + ConfigurationRecursionPrevention() { any() } + + override predicate hasFlow(Node source, Node sink) { + strictcount(Node n | this.isSource(n)) < 0 + or + strictcount(Node n | this.isSink(n)) < 0 + or + strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0 + or + super.hasFlow(source, sink) + } +} + +private predicate inBarrier(Node node, Configuration config) { + config.isBarrierIn(node) and + config.isSource(node) +} + +private predicate outBarrier(Node node, Configuration config) { + config.isBarrierOut(node) and + config.isSink(node) +} + +private predicate fullBarrier(Node node, Configuration config) { + config.isBarrier(node) + or + config.isBarrierIn(node) and + not config.isSource(node) + or + config.isBarrierOut(node) and + not config.isSink(node) + or + exists(BarrierGuard g | + config.isBarrierGuard(g) and + node = g.getAGuardedNode() + ) +} + +private class AdditionalFlowStepSource extends Node { + AdditionalFlowStepSource() { any(Configuration c).isAdditionalFlowStep(this, _) } +} + +pragma[noinline] +private predicate isAdditionalFlowStep( + AdditionalFlowStepSource node1, Node node2, DataFlowCallable callable1, Configuration config +) { + config.isAdditionalFlowStep(node1, node2) and + callable1 = node1.getEnclosingCallable() +} + +/** + * Holds if data can flow in one local step from `node1` to `node2`. + */ +private predicate localFlowStep(Node node1, Node node2, Configuration config) { + simpleLocalFlowStep(node1, node2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if the additional step from `node1` to `node2` does not jump between callables. + */ +private predicate additionalLocalFlowStep(Node node1, Node node2, Configuration config) { + isAdditionalFlowStep(node1, node2, node2.getEnclosingCallable(), config) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if data can flow from `node1` to `node2` in a way that discards call contexts. + */ +private predicate jumpStep(Node node1, Node node2, Configuration config) { + jumpStep(node1, node2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if the additional step from `node1` to `node2` jumps between callables. + */ +private predicate additionalJumpStep(Node node1, Node node2, Configuration config) { + exists(DataFlowCallable callable1 | + isAdditionalFlowStep(node1, node2, callable1, config) and + node2.getEnclosingCallable() != callable1 and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) + ) +} + +/** + * Holds if field flow should be used for the given configuration. + */ +private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 } + +/** + * Holds if `node` is reachable from a source in the configuration `config`. + * + * The Boolean `fromArg` records whether the node is reached through an + * argument in a call. + */ +private predicate nodeCandFwd1(Node node, boolean fromArg, Configuration config) { + not fullBarrier(node, config) and + ( + config.isSource(node) and + fromArg = false + or + exists(Node mid | + nodeCandFwd1(mid, fromArg, config) and + localFlowStep(mid, node, config) + ) + or + exists(Node mid | + nodeCandFwd1(mid, fromArg, config) and + additionalLocalFlowStep(mid, node, config) + ) + or + exists(Node mid | + nodeCandFwd1(mid, config) and + jumpStep(mid, node, config) and + fromArg = false + ) + or + exists(Node mid | + nodeCandFwd1(mid, config) and + additionalJumpStep(mid, node, config) and + fromArg = false + ) + or + // store + exists(Node mid | + useFieldFlow(config) and + nodeCandFwd1(mid, fromArg, config) and + store(mid, _, node, _) and + not outBarrier(mid, config) + ) + or + // read + exists(Content c | + nodeCandFwd1Read(c, node, fromArg, config) and + nodeCandFwd1IsStored(c, config) and + not inBarrier(node, config) + ) + or + // flow into a callable + exists(Node arg | + nodeCandFwd1(arg, config) and + viableParamArg(_, node, arg) and + fromArg = true + ) + or + // flow out of a callable + exists(DataFlowCall call | + nodeCandFwd1Out(call, node, false, config) and + fromArg = false + or + nodeCandFwd1OutFromArg(call, node, config) and + nodeCandFwd1IsEntered(call, fromArg, config) + ) + ) +} + +private predicate nodeCandFwd1(Node node, Configuration config) { nodeCandFwd1(node, _, config) } + +pragma[nomagic] +private predicate nodeCandFwd1Read(Content c, Node node, boolean fromArg, Configuration config) { + exists(Node mid | + nodeCandFwd1(mid, fromArg, config) and + read(mid, c, node) + ) +} + +/** + * Holds if `c` is the target of a store in the flow covered by `nodeCandFwd1`. + */ +pragma[nomagic] +private predicate nodeCandFwd1IsStored(Content c, Configuration config) { + exists(Node mid, Node node, TypedContent tc | + not fullBarrier(node, config) and + useFieldFlow(config) and + nodeCandFwd1(mid, config) and + store(mid, tc, node, _) and + c = tc.getContent() + ) +} + +pragma[nomagic] +private predicate nodeCandFwd1ReturnPosition( + ReturnPosition pos, boolean fromArg, Configuration config +) { + exists(ReturnNodeExt ret | + nodeCandFwd1(ret, fromArg, config) and + getReturnPosition(ret) = pos + ) +} + +pragma[nomagic] +private predicate nodeCandFwd1Out(DataFlowCall call, Node out, boolean fromArg, Configuration config) { + exists(ReturnPosition pos | + nodeCandFwd1ReturnPosition(pos, fromArg, config) and + viableReturnPosOut(call, pos, out) + ) +} + +pragma[nomagic] +private predicate nodeCandFwd1OutFromArg(DataFlowCall call, Node node, Configuration config) { + nodeCandFwd1Out(call, node, true, config) +} + +/** + * Holds if an argument to `call` is reached in the flow covered by `nodeCandFwd1`. + */ +pragma[nomagic] +private predicate nodeCandFwd1IsEntered(DataFlowCall call, boolean fromArg, Configuration config) { + exists(ArgumentNode arg | + nodeCandFwd1(arg, fromArg, config) and + viableParamArg(call, _, arg) + ) +} + +bindingset[result, b] +private boolean unbindBool(boolean b) { result != b.booleanNot() } + +/** + * Holds if `node` is part of a path from a source to a sink in the + * configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from + * the enclosing callable in order to reach a sink. + */ +pragma[nomagic] +private predicate nodeCand1(Node node, boolean toReturn, Configuration config) { + nodeCand1_0(node, toReturn, config) and + nodeCandFwd1(node, config) +} + +pragma[nomagic] +private predicate nodeCand1_0(Node node, boolean toReturn, Configuration config) { + nodeCandFwd1(node, config) and + config.isSink(node) and + toReturn = false + or + exists(Node mid | + localFlowStep(node, mid, config) and + nodeCand1(mid, toReturn, config) + ) + or + exists(Node mid | + additionalLocalFlowStep(node, mid, config) and + nodeCand1(mid, toReturn, config) + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + nodeCand1(mid, _, config) and + toReturn = false + ) + or + exists(Node mid | + additionalJumpStep(node, mid, config) and + nodeCand1(mid, _, config) and + toReturn = false + ) + or + // store + exists(Content c | + nodeCand1Store(c, node, toReturn, config) and + nodeCand1IsRead(c, config) + ) + or + // read + exists(Node mid, Content c | + read(node, c, mid) and + nodeCandFwd1IsStored(c, unbind(config)) and + nodeCand1(mid, toReturn, config) + ) + or + // flow into a callable + exists(DataFlowCall call | + nodeCand1In(call, node, false, config) and + toReturn = false + or + nodeCand1InToReturn(call, node, config) and + nodeCand1IsReturned(call, toReturn, config) + ) + or + // flow out of a callable + exists(ReturnPosition pos | + nodeCand1Out(pos, config) and + getReturnPosition(node) = pos and + toReturn = true + ) +} + +/** + * Holds if `c` is the target of a read in the flow covered by `nodeCand1`. + */ +pragma[nomagic] +private predicate nodeCand1IsRead(Content c, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCandFwd1(node, unbind(config)) and + read(node, c, mid) and + nodeCandFwd1IsStored(c, unbind(config)) and + nodeCand1(mid, _, config) + ) +} + +pragma[nomagic] +private predicate nodeCand1Store(Content c, Node node, boolean toReturn, Configuration config) { + exists(Node mid, TypedContent tc | + nodeCand1(mid, toReturn, config) and + nodeCandFwd1IsStored(c, unbind(config)) and + store(node, tc, mid, _) and + c = tc.getContent() + ) +} + +/** + * Holds if `c` is the target of both a read and a store in the flow covered + * by `nodeCand1`. + */ +private predicate nodeCand1IsReadAndStored(Content c, Configuration conf) { + nodeCand1IsRead(c, conf) and + nodeCand1Store(c, _, _, conf) +} + +pragma[nomagic] +private predicate viableReturnPosOutNodeCandFwd1( + DataFlowCall call, ReturnPosition pos, Node out, Configuration config +) { + nodeCandFwd1ReturnPosition(pos, _, config) and + viableReturnPosOut(call, pos, out) +} + +pragma[nomagic] +private predicate nodeCand1Out(ReturnPosition pos, Configuration config) { + exists(DataFlowCall call, Node out | + nodeCand1(out, _, config) and + viableReturnPosOutNodeCandFwd1(call, pos, out, config) + ) +} + +pragma[nomagic] +private predicate viableParamArgNodeCandFwd1( + DataFlowCall call, ParameterNode p, ArgumentNode arg, Configuration config +) { + viableParamArg(call, p, arg) and + nodeCandFwd1(arg, config) +} + +pragma[nomagic] +private predicate nodeCand1In( + DataFlowCall call, ArgumentNode arg, boolean toReturn, Configuration config +) { + exists(ParameterNode p | + nodeCand1(p, toReturn, config) and + viableParamArgNodeCandFwd1(call, p, arg, config) + ) +} + +pragma[nomagic] +private predicate nodeCand1InToReturn(DataFlowCall call, ArgumentNode arg, Configuration config) { + nodeCand1In(call, arg, true, config) +} + +/** + * Holds if an output from `call` is reached in the flow covered by `nodeCand1`. + */ +pragma[nomagic] +private predicate nodeCand1IsReturned(DataFlowCall call, boolean toReturn, Configuration config) { + exists(Node out | + nodeCand1(out, toReturn, config) and + nodeCandFwd1OutFromArg(call, out, config) + ) +} + +pragma[nomagic] +private predicate nodeCand1(Node node, Configuration config) { nodeCand1(node, _, config) } + +private predicate throughFlowNodeCand1(Node node, Configuration config) { + nodeCand1(node, true, config) and + not fullBarrier(node, config) and + not inBarrier(node, config) and + not outBarrier(node, config) +} + +/** Holds if flow may return from `callable`. */ +pragma[nomagic] +private predicate returnFlowCallableNodeCand1( + DataFlowCallable callable, ReturnKindExt kind, Configuration config +) { + exists(ReturnNodeExt ret | + throughFlowNodeCand1(ret, config) and + callable = ret.getEnclosingCallable() and + kind = ret.getKind() + ) +} + +/** + * Holds if flow may enter through `p` and reach a return node making `p` a + * candidate for the origin of a summary. + */ +private predicate parameterThroughFlowNodeCand1(ParameterNode p, Configuration config) { + exists(ReturnKindExt kind | + throughFlowNodeCand1(p, config) and + returnFlowCallableNodeCand1(p.getEnclosingCallable(), kind, config) and + // we don't expect a parameter to return stored in itself + not exists(int pos | + kind.(ParamUpdateReturnKind).getPosition() = pos and p.isParameterOf(_, pos) + ) + ) +} + +pragma[nomagic] +private predicate storeCand1(Node n1, Content c, Node n2, Configuration config) { + exists(TypedContent tc | + nodeCand1IsReadAndStored(c, config) and + nodeCand1(n2, unbind(config)) and + store(n1, tc, n2, _) and + c = tc.getContent() + ) +} + +pragma[nomagic] +private predicate read(Node n1, Content c, Node n2, Configuration config) { + nodeCand1IsReadAndStored(c, config) and + nodeCand1(n2, unbind(config)) and + read(n1, c, n2) +} + +pragma[noinline] +private predicate localFlowStepNodeCand1(Node node1, Node node2, Configuration config) { + nodeCand1(node1, config) and + localFlowStep(node1, node2, config) +} + +pragma[noinline] +private predicate additionalLocalFlowStepNodeCand1(Node node1, Node node2, Configuration config) { + nodeCand1(node1, config) and + additionalLocalFlowStep(node1, node2, config) +} + +pragma[nomagic] +private predicate viableReturnPosOutNodeCand1( + DataFlowCall call, ReturnPosition pos, Node out, Configuration config +) { + nodeCand1(out, _, config) and + viableReturnPosOutNodeCandFwd1(call, pos, out, config) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, ReturnNodeExt ret, Node out, Configuration config +) { + viableReturnPosOutNodeCand1(call, getReturnPosition(ret), out, config) and + nodeCand1(ret, config) and + not outBarrier(ret, config) and + not inBarrier(out, config) +} + +pragma[nomagic] +private predicate viableParamArgNodeCand1( + DataFlowCall call, ParameterNode p, ArgumentNode arg, Configuration config +) { + viableParamArgNodeCandFwd1(call, p, arg, config) and + nodeCand1(arg, config) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgumentNode arg, ParameterNode p, Configuration config +) { + viableParamArgNodeCand1(call, p, arg, config) and + nodeCand1(p, config) and + not outBarrier(arg, config) and + not inBarrier(p, config) +} + +/** + * Gets the amount of forward branching on the origin of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int branch(Node n1, Configuration conf) { + result = + strictcount(Node n | + flowOutOfCallNodeCand1(_, n1, n, conf) or flowIntoCallNodeCand1(_, n1, n, conf) + ) +} + +/** + * Gets the amount of backward branching on the target of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int join(Node n2, Configuration conf) { + result = + strictcount(Node n | + flowOutOfCallNodeCand1(_, n, n2, conf) or flowIntoCallNodeCand1(_, n, n2, conf) + ) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. The + * `allowsFieldFlow` flag indicates whether the branching is within the limit + * specified by the configuration. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, ReturnNodeExt ret, Node out, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, ret, out, config) and + exists(int b, int j | + b = branch(ret, config) and + j = join(out, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. The `allowsFieldFlow` flag indicates whether + * the branching is within the limit specified by the configuration. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgumentNode arg, ParameterNode p, boolean allowsFieldFlow, + Configuration config +) { + flowIntoCallNodeCand1(call, arg, p, config) and + exists(int b, int j | + b = branch(arg, config) and + j = join(p, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if `node` is reachable from a source in the configuration `config`. + * The Boolean `stored` records whether the tracked value is stored into a + * field of `node`. + * + * The Boolean `fromArg` records whether the node is reached through an + * argument in a call, and if so, `argStored` records whether the tracked + * value was stored into a field of the argument. + */ +private predicate nodeCandFwd2( + Node node, boolean fromArg, BooleanOption argStored, boolean stored, Configuration config +) { + nodeCand1(node, config) and + config.isSource(node) and + fromArg = false and + argStored = TBooleanNone() and + stored = false + or + nodeCand1(node, unbind(config)) and + ( + exists(Node mid | + nodeCandFwd2(mid, fromArg, argStored, stored, config) and + localFlowStepNodeCand1(mid, node, config) + ) + or + exists(Node mid | + nodeCandFwd2(mid, fromArg, argStored, stored, config) and + additionalLocalFlowStepNodeCand1(mid, node, config) and + stored = false + ) + or + exists(Node mid | + nodeCandFwd2(mid, _, _, stored, config) and + jumpStep(mid, node, config) and + fromArg = false and + argStored = TBooleanNone() + ) + or + exists(Node mid | + nodeCandFwd2(mid, _, _, stored, config) and + additionalJumpStep(mid, node, config) and + fromArg = false and + argStored = TBooleanNone() and + stored = false + ) + or + // store + exists(Node mid | + nodeCandFwd2(mid, fromArg, argStored, _, config) and + storeCand1(mid, _, node, config) and + stored = true + ) + or + // read + exists(Content c | + nodeCandFwd2Read(c, node, fromArg, argStored, config) and + nodeCandFwd2IsStored(c, stored, config) + ) + or + // flow into a callable + nodeCandFwd2In(_, node, _, _, stored, config) and + fromArg = true and + if parameterThroughFlowNodeCand1(node, config) + then argStored = TBooleanSome(stored) + else argStored = TBooleanNone() + or + // flow out of a callable + exists(DataFlowCall call | + nodeCandFwd2Out(call, node, fromArg, argStored, stored, config) and + fromArg = false + or + exists(boolean argStored0 | + nodeCandFwd2OutFromArg(call, node, argStored0, stored, config) and + nodeCandFwd2IsEntered(call, fromArg, argStored, argStored0, config) + ) + ) + ) +} + +/** + * Holds if `c` is the target of a store in the flow covered by `nodeCandFwd2`. + */ +pragma[noinline] +private predicate nodeCandFwd2IsStored(Content c, boolean stored, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCand1(node, unbind(config)) and + nodeCandFwd2(mid, _, _, stored, config) and + storeCand1(mid, c, node, config) + ) +} + +pragma[nomagic] +private predicate nodeCandFwd2Read( + Content c, Node node, boolean fromArg, BooleanOption argStored, Configuration config +) { + exists(Node mid | + nodeCandFwd2(mid, fromArg, argStored, true, config) and + read(mid, c, node, config) + ) +} + +pragma[nomagic] +private predicate nodeCandFwd2In( + DataFlowCall call, ParameterNode p, boolean fromArg, BooleanOption argStored, boolean stored, + Configuration config +) { + exists(ArgumentNode arg, boolean allowsFieldFlow | + nodeCandFwd2(arg, fromArg, argStored, stored, config) and + flowIntoCallNodeCand1(call, arg, p, allowsFieldFlow, config) + | + stored = false or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate nodeCandFwd2Out( + DataFlowCall call, Node out, boolean fromArg, BooleanOption argStored, boolean stored, + Configuration config +) { + exists(ReturnNodeExt ret, boolean allowsFieldFlow | + nodeCandFwd2(ret, fromArg, argStored, stored, config) and + flowOutOfCallNodeCand1(call, ret, out, allowsFieldFlow, config) + | + stored = false or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate nodeCandFwd2OutFromArg( + DataFlowCall call, Node out, boolean argStored, boolean stored, Configuration config +) { + nodeCandFwd2Out(call, out, true, TBooleanSome(argStored), stored, config) +} + +/** + * Holds if an argument to `call` is reached in the flow covered by `nodeCandFwd2`. + */ +pragma[nomagic] +private predicate nodeCandFwd2IsEntered( + DataFlowCall call, boolean fromArg, BooleanOption argStored, boolean stored, Configuration config +) { + exists(ParameterNode p | + nodeCandFwd2In(call, p, fromArg, argStored, stored, config) and + parameterThroughFlowNodeCand1(p, config) + ) +} + +/** + * Holds if `node` is part of a path from a source to a sink in the + * configuration `config`. The Boolean `read` records whether the tracked + * value must be read from a field of `node` in order to reach a sink. + * + * The Boolean `toReturn` records whether the node must be returned from + * the enclosing callable in order to reach a sink, and if so, `returnRead` + * records whether a field must be read from the returned value. + */ +private predicate nodeCand2( + Node node, boolean toReturn, BooleanOption returnRead, boolean read, Configuration config +) { + nodeCandFwd2(node, _, _, false, config) and + config.isSink(node) and + toReturn = false and + returnRead = TBooleanNone() and + read = false + or + nodeCandFwd2(node, _, _, unbindBool(read), unbind(config)) and + ( + exists(Node mid | + localFlowStepNodeCand1(node, mid, config) and + nodeCand2(mid, toReturn, returnRead, read, config) + ) + or + exists(Node mid | + additionalLocalFlowStepNodeCand1(node, mid, config) and + nodeCand2(mid, toReturn, returnRead, read, config) and + read = false + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + nodeCand2(mid, _, _, read, config) and + toReturn = false and + returnRead = TBooleanNone() + ) + or + exists(Node mid | + additionalJumpStep(node, mid, config) and + nodeCand2(mid, _, _, read, config) and + toReturn = false and + returnRead = TBooleanNone() and + read = false + ) + or + // store + exists(Content c | + nodeCand2Store(c, node, toReturn, returnRead, read, config) and + nodeCand2IsRead(c, read, config) + ) + or + // read + exists(Node mid, Content c, boolean read0 | + read(node, c, mid, config) and + nodeCandFwd2IsStored(c, unbindBool(read0), unbind(config)) and + nodeCand2(mid, toReturn, returnRead, read0, config) and + read = true + ) + or + // flow into a callable + exists(DataFlowCall call | + nodeCand2In(call, node, toReturn, returnRead, read, config) and + toReturn = false + or + exists(boolean returnRead0 | + nodeCand2InToReturn(call, node, returnRead0, read, config) and + nodeCand2IsReturned(call, toReturn, returnRead, returnRead0, config) + ) + ) + or + // flow out of a callable + nodeCand2Out(_, node, _, _, read, config) and + toReturn = true and + if nodeCandFwd2(node, true, TBooleanSome(_), unbindBool(read), config) + then returnRead = TBooleanSome(read) + else returnRead = TBooleanNone() + ) +} + +/** + * Holds if `c` is the target of a read in the flow covered by `nodeCand2`. + */ +pragma[noinline] +private predicate nodeCand2IsRead(Content c, boolean read, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCandFwd2(node, _, _, true, unbind(config)) and + read(node, c, mid, config) and + nodeCandFwd2IsStored(c, unbindBool(read), unbind(config)) and + nodeCand2(mid, _, _, read, config) + ) +} + +pragma[nomagic] +private predicate nodeCand2Store( + Content c, Node node, boolean toReturn, BooleanOption returnRead, boolean stored, + Configuration config +) { + exists(Node mid | + storeCand1(node, c, mid, config) and + nodeCand2(mid, toReturn, returnRead, true, config) and + nodeCandFwd2(node, _, _, stored, unbind(config)) + ) +} + +/** + * Holds if `c` is the target of a store in the flow covered by `nodeCand2`. + */ +pragma[nomagic] +private predicate nodeCand2IsStored(Content c, boolean stored, Configuration conf) { + exists(Node node | + nodeCand2Store(c, node, _, _, stored, conf) and + nodeCand2(node, _, _, stored, conf) + ) +} + +/** + * Holds if `c` is the target of both a store and a read in the path graph + * covered by `nodeCand2`. + */ +pragma[noinline] +private predicate nodeCand2IsReadAndStored(Content c, Configuration conf) { + exists(boolean apNonEmpty | + nodeCand2IsStored(c, apNonEmpty, conf) and + nodeCand2IsRead(c, apNonEmpty, conf) + ) +} + +pragma[nomagic] +private predicate nodeCand2Out( + DataFlowCall call, ReturnNodeExt ret, boolean toReturn, BooleanOption returnRead, boolean read, + Configuration config +) { + exists(Node out, boolean allowsFieldFlow | + nodeCand2(out, toReturn, returnRead, read, config) and + flowOutOfCallNodeCand1(call, ret, out, allowsFieldFlow, config) + | + read = false or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate nodeCand2In( + DataFlowCall call, ArgumentNode arg, boolean toReturn, BooleanOption returnRead, boolean read, + Configuration config +) { + exists(ParameterNode p, boolean allowsFieldFlow | + nodeCand2(p, toReturn, returnRead, read, config) and + flowIntoCallNodeCand1(call, arg, p, allowsFieldFlow, config) + | + read = false or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate nodeCand2InToReturn( + DataFlowCall call, ArgumentNode arg, boolean returnRead, boolean read, Configuration config +) { + nodeCand2In(call, arg, true, TBooleanSome(returnRead), read, config) +} + +/** + * Holds if an output from `call` is reached in the flow covered by `nodeCand2`. + */ +pragma[nomagic] +private predicate nodeCand2IsReturned( + DataFlowCall call, boolean toReturn, BooleanOption returnRead, boolean read, Configuration config +) { + exists(ReturnNodeExt ret | + nodeCand2Out(call, ret, toReturn, returnRead, read, config) and + nodeCandFwd2(ret, true, TBooleanSome(_), read, config) + ) +} + +private predicate nodeCand2(Node node, Configuration config) { nodeCand2(node, _, _, _, config) } + +pragma[nomagic] +private predicate flowOutOfCallNodeCand2( + DataFlowCall call, ReturnNodeExt node1, Node node2, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + nodeCand2(node2, config) and + nodeCand2(node1, unbind(config)) +} + +pragma[nomagic] +private predicate flowIntoCallNodeCand2( + DataFlowCall call, ArgumentNode node1, ParameterNode node2, boolean allowsFieldFlow, + Configuration config +) { + flowIntoCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + nodeCand2(node2, config) and + nodeCand2(node1, unbind(config)) +} + +private module LocalFlowBigStep { + /** + * A node where some checking is required, and hence the big-step relation + * is not allowed to step over. + */ + private class FlowCheckNode extends Node { + FlowCheckNode() { + this instanceof CastNode or + clearsContent(this, _) + } + } + + /** + * Holds if `node` can be the first node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + private predicate localFlowEntry(Node node, Configuration config) { + nodeCand2(node, config) and + ( + config.isSource(node) or + jumpStep(_, node, config) or + additionalJumpStep(_, node, config) or + node instanceof ParameterNode or + node instanceof OutNodeExt or + store(_, _, node, _) or + read(_, _, node) or + node instanceof FlowCheckNode + ) + } + + /** + * Holds if `node` can be the last node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + private predicate localFlowExit(Node node, Configuration config) { + exists(Node next | nodeCand2(next, config) | + jumpStep(node, next, config) or + additionalJumpStep(node, next, config) or + flowIntoCallNodeCand1(_, node, next, config) or + flowOutOfCallNodeCand1(_, node, next, config) or + store(node, _, next, _) or + read(node, _, next) + ) + or + node instanceof FlowCheckNode + or + config.isSink(node) + } + + pragma[noinline] + private predicate additionalLocalFlowStepNodeCand2(Node node1, Node node2, Configuration config) { + additionalLocalFlowStepNodeCand1(node1, node2, config) and + nodeCand2(node1, _, _, false, config) and + nodeCand2(node2, _, _, false, unbind(config)) + } + + /** + * Holds if the local path from `node1` to `node2` is a prefix of a maximal + * subsequence of local flow steps in a dataflow path. + * + * This is the transitive closure of `[additional]localFlowStep` beginning + * at `localFlowEntry`. + */ + pragma[nomagic] + private predicate localFlowStepPlus( + Node node1, Node node2, boolean preservesValue, DataFlowType t, Configuration config, + LocalCallContext cc + ) { + not isUnreachableInCall(node2, cc.(LocalCallContextSpecificCall).getCall()) and + ( + localFlowEntry(node1, config) and + ( + localFlowStepNodeCand1(node1, node2, config) and + preservesValue = true and + t = getNodeType(node1) + or + additionalLocalFlowStepNodeCand2(node1, node2, config) and + preservesValue = false and + t = getNodeType(node2) + ) and + node1 != node2 and + cc.relevantFor(node1.getEnclosingCallable()) and + not isUnreachableInCall(node1, cc.(LocalCallContextSpecificCall).getCall()) and + nodeCand2(node2, unbind(config)) + or + exists(Node mid | + localFlowStepPlus(node1, mid, preservesValue, t, config, cc) and + localFlowStepNodeCand1(mid, node2, config) and + not mid instanceof FlowCheckNode and + nodeCand2(node2, unbind(config)) + ) + or + exists(Node mid | + localFlowStepPlus(node1, mid, _, _, config, cc) and + additionalLocalFlowStepNodeCand2(mid, node2, config) and + not mid instanceof FlowCheckNode and + preservesValue = false and + t = getNodeType(node2) and + nodeCand2(node2, unbind(config)) + ) + ) + } + + /** + * Holds if `node1` can step to `node2` in one or more local steps and this + * path can occur as a maximal subsequence of local steps in a dataflow path. + */ + pragma[nomagic] + predicate localFlowBigStep( + Node node1, Node node2, boolean preservesValue, AccessPathFrontNil apf, Configuration config, + LocalCallContext callContext + ) { + localFlowStepPlus(node1, node2, preservesValue, apf.getType(), config, callContext) and + localFlowExit(node2, config) + } +} + +private import LocalFlowBigStep + +pragma[nomagic] +private predicate readCand2(Node node1, Content c, Node node2, Configuration config) { + read(node1, c, node2, config) and + nodeCand2(node1, _, _, true, unbind(config)) and + nodeCand2(node2, config) and + nodeCand2IsReadAndStored(c, unbind(config)) +} + +pragma[nomagic] +private predicate storeCand2( + Node node1, TypedContent tc, Node node2, DataFlowType contentType, Configuration config +) { + store(node1, tc, node2, contentType) and + nodeCand2(node1, config) and + nodeCand2(node2, _, _, true, unbind(config)) and + nodeCand2IsReadAndStored(tc.getContent(), unbind(config)) +} + +/** + * Holds if `node` is reachable with access path front `apf` from a + * source in the configuration `config`. + * + * The Boolean `fromArg` records whether the node is reached through an + * argument in a call, and if so, `argApf` records the front of the + * access path of that argument. + */ +pragma[nomagic] +private predicate flowCandFwd( + Node node, boolean fromArg, AccessPathFrontOption argApf, AccessPathFront apf, + Configuration config +) { + flowCandFwd0(node, fromArg, argApf, apf, config) and + not apf.isClearedAt(node) and + if node instanceof CastingNode then compatibleTypes(getNodeType(node), apf.getType()) else any() +} + +pragma[nomagic] +private predicate flowCandFwd0( + Node node, boolean fromArg, AccessPathFrontOption argApf, AccessPathFront apf, + Configuration config +) { + nodeCand2(node, _, _, false, config) and + config.isSource(node) and + fromArg = false and + argApf = TAccessPathFrontNone() and + apf = TFrontNil(getNodeType(node)) + or + exists(Node mid | + flowCandFwd(mid, fromArg, argApf, apf, config) and + localFlowBigStep(mid, node, true, _, config, _) + ) + or + exists(Node mid, AccessPathFrontNil nil | + flowCandFwd(mid, fromArg, argApf, nil, config) and + localFlowBigStep(mid, node, false, apf, config, _) + ) + or + exists(Node mid | + flowCandFwd(mid, _, _, apf, config) and + nodeCand2(node, unbind(config)) and + jumpStep(mid, node, config) and + fromArg = false and + argApf = TAccessPathFrontNone() + ) + or + exists(Node mid, AccessPathFrontNil nil | + flowCandFwd(mid, _, _, nil, config) and + nodeCand2(node, unbind(config)) and + additionalJumpStep(mid, node, config) and + fromArg = false and + argApf = TAccessPathFrontNone() and + apf = TFrontNil(getNodeType(node)) + ) + or + // store + exists(Node mid, TypedContent tc, AccessPathFront apf0, DataFlowType contentType | + flowCandFwd(mid, fromArg, argApf, apf0, config) and + storeCand2(mid, tc, node, contentType, config) and + nodeCand2(node, _, _, true, unbind(config)) and + apf.headUsesContent(tc) and + compatibleTypes(apf0.getType(), contentType) + ) + or + // read + exists(TypedContent tc | + flowCandFwdRead(tc, node, fromArg, argApf, config) and + flowCandFwdConsCand(tc, apf, config) and + nodeCand2(node, _, _, unbindBool(apf.toBoolNonEmpty()), unbind(config)) + ) + or + // flow into a callable + flowCandFwdIn(_, node, _, _, apf, config) and + fromArg = true and + if nodeCand2(node, true, _, unbindBool(apf.toBoolNonEmpty()), config) + then argApf = TAccessPathFrontSome(apf) + else argApf = TAccessPathFrontNone() + or + // flow out of a callable + exists(DataFlowCall call | + flowCandFwdOut(call, node, fromArg, argApf, apf, config) and + fromArg = false + or + exists(AccessPathFront argApf0 | + flowCandFwdOutFromArg(call, node, argApf0, apf, config) and + flowCandFwdIsEntered(call, fromArg, argApf, argApf0, config) + ) + ) +} + +pragma[nomagic] +private predicate flowCandFwdConsCand(TypedContent tc, AccessPathFront apf, Configuration config) { + exists(Node mid, Node n, DataFlowType contentType | + flowCandFwd(mid, _, _, apf, config) and + storeCand2(mid, tc, n, contentType, config) and + nodeCand2(n, _, _, true, unbind(config)) and + compatibleTypes(apf.getType(), contentType) + ) +} + +pragma[nomagic] +private predicate flowCandFwdRead0( + Node node1, TypedContent tc, Content c, Node node2, boolean fromArg, AccessPathFrontOption argApf, + AccessPathFrontHead apf, Configuration config +) { + flowCandFwd(node1, fromArg, argApf, apf, config) and + readCand2(node1, c, node2, config) and + apf.headUsesContent(tc) +} + +pragma[nomagic] +private predicate flowCandFwdRead( + TypedContent tc, Node node, boolean fromArg, AccessPathFrontOption argApf, Configuration config +) { + flowCandFwdRead0(_, tc, tc.getContent(), node, fromArg, argApf, _, config) +} + +pragma[nomagic] +private predicate flowCandFwdIn( + DataFlowCall call, ParameterNode p, boolean fromArg, AccessPathFrontOption argApf, + AccessPathFront apf, Configuration config +) { + exists(ArgumentNode arg, boolean allowsFieldFlow | + flowCandFwd(arg, fromArg, argApf, apf, config) and + flowIntoCallNodeCand2(call, arg, p, allowsFieldFlow, config) + | + apf instanceof AccessPathFrontNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowCandFwdOut( + DataFlowCall call, Node node, boolean fromArg, AccessPathFrontOption argApf, AccessPathFront apf, + Configuration config +) { + exists(ReturnNodeExt ret, boolean allowsFieldFlow | + flowCandFwd(ret, fromArg, argApf, apf, config) and + flowOutOfCallNodeCand2(call, ret, node, allowsFieldFlow, config) + | + apf instanceof AccessPathFrontNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowCandFwdOutFromArg( + DataFlowCall call, Node node, AccessPathFront argApf, AccessPathFront apf, Configuration config +) { + flowCandFwdOut(call, node, true, TAccessPathFrontSome(argApf), apf, config) +} + +/** + * Holds if an argument to `call` is reached in the flow covered by `flowCandFwd`. + */ +pragma[nomagic] +private predicate flowCandFwdIsEntered( + DataFlowCall call, boolean fromArg, AccessPathFrontOption argApf, AccessPathFront apf, + Configuration config +) { + exists(ParameterNode p | + flowCandFwdIn(call, p, fromArg, argApf, apf, config) and + nodeCand2(p, true, TBooleanSome(_), unbindBool(apf.toBoolNonEmpty()), config) + ) +} + +/** + * Holds if `node` with access path front `apf` is part of a path from a + * source to a sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from + * the enclosing callable in order to reach a sink, and if so, `returnApf` + * records the front of the access path of the returned value. + */ +pragma[nomagic] +private predicate flowCand( + Node node, boolean toReturn, AccessPathFrontOption returnApf, AccessPathFront apf, + Configuration config +) { + flowCand0(node, toReturn, returnApf, apf, config) and + flowCandFwd(node, _, _, apf, config) +} + +pragma[nomagic] +private predicate flowCand0( + Node node, boolean toReturn, AccessPathFrontOption returnApf, AccessPathFront apf, + Configuration config +) { + flowCandFwd(node, _, _, apf, config) and + config.isSink(node) and + toReturn = false and + returnApf = TAccessPathFrontNone() and + apf instanceof AccessPathFrontNil + or + exists(Node mid | + localFlowBigStep(node, mid, true, _, config, _) and + flowCand(mid, toReturn, returnApf, apf, config) + ) + or + exists(Node mid, AccessPathFrontNil nil | + flowCandFwd(node, _, _, apf, config) and + localFlowBigStep(node, mid, false, _, config, _) and + flowCand(mid, toReturn, returnApf, nil, config) and + apf instanceof AccessPathFrontNil + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + flowCand(mid, _, _, apf, config) and + toReturn = false and + returnApf = TAccessPathFrontNone() + ) + or + exists(Node mid, AccessPathFrontNil nil | + flowCandFwd(node, _, _, apf, config) and + additionalJumpStep(node, mid, config) and + flowCand(mid, _, _, nil, config) and + toReturn = false and + returnApf = TAccessPathFrontNone() and + apf instanceof AccessPathFrontNil + ) + or + // store + exists(TypedContent tc | + flowCandStore(node, tc, apf, toReturn, returnApf, config) and + flowCandConsCand(tc, apf, config) + ) + or + // read + exists(TypedContent tc, AccessPathFront apf0 | + flowCandRead(node, tc, apf, toReturn, returnApf, apf0, config) and + flowCandFwdConsCand(tc, apf0, config) + ) + or + // flow into a callable + exists(DataFlowCall call | + flowCandIn(call, node, toReturn, returnApf, apf, config) and + toReturn = false + or + exists(AccessPathFront returnApf0 | + flowCandInToReturn(call, node, returnApf0, apf, config) and + flowCandIsReturned(call, toReturn, returnApf, returnApf0, config) + ) + ) + or + // flow out of a callable + flowCandOut(_, node, _, _, apf, config) and + toReturn = true and + if flowCandFwd(node, true, _, apf, config) + then returnApf = TAccessPathFrontSome(apf) + else returnApf = TAccessPathFrontNone() +} + +pragma[nomagic] +private predicate readCandFwd( + Node node1, TypedContent tc, AccessPathFront apf, Node node2, Configuration config +) { + flowCandFwdRead0(node1, tc, tc.getContent(), node2, _, _, apf, config) +} + +pragma[nomagic] +private predicate flowCandRead( + Node node, TypedContent tc, AccessPathFront apf, boolean toReturn, + AccessPathFrontOption returnApf, AccessPathFront apf0, Configuration config +) { + exists(Node mid | + readCandFwd(node, tc, apf, mid, config) and + flowCand(mid, toReturn, returnApf, apf0, config) + ) +} + +pragma[nomagic] +private predicate flowCandStore( + Node node, TypedContent tc, AccessPathFront apf, boolean toReturn, + AccessPathFrontOption returnApf, Configuration config +) { + exists(Node mid | + flowCandFwd(node, _, _, apf, config) and + storeCand2(node, tc, mid, _, unbind(config)) and + flowCand(mid, toReturn, returnApf, TFrontHead(tc), unbind(config)) + ) +} + +pragma[nomagic] +private predicate flowCandConsCand(TypedContent tc, AccessPathFront apf, Configuration config) { + flowCandFwdConsCand(tc, apf, config) and + flowCandRead(_, tc, _, _, _, apf, config) +} + +pragma[nomagic] +private predicate flowCandOut( + DataFlowCall call, ReturnNodeExt ret, boolean toReturn, AccessPathFrontOption returnApf, + AccessPathFront apf, Configuration config +) { + exists(Node out, boolean allowsFieldFlow | + flowCand(out, toReturn, returnApf, apf, config) and + flowOutOfCallNodeCand2(call, ret, out, allowsFieldFlow, config) + | + apf instanceof AccessPathFrontNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowCandIn( + DataFlowCall call, ArgumentNode arg, boolean toReturn, AccessPathFrontOption returnApf, + AccessPathFront apf, Configuration config +) { + exists(ParameterNode p, boolean allowsFieldFlow | + flowCand(p, toReturn, returnApf, apf, config) and + flowIntoCallNodeCand2(call, arg, p, allowsFieldFlow, config) + | + apf instanceof AccessPathFrontNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowCandInToReturn( + DataFlowCall call, ArgumentNode arg, AccessPathFront returnApf, AccessPathFront apf, + Configuration config +) { + flowCandIn(call, arg, true, TAccessPathFrontSome(returnApf), apf, config) +} + +/** + * Holds if an output from `call` is reached in the flow covered by `flowCand`. + */ +pragma[nomagic] +private predicate flowCandIsReturned( + DataFlowCall call, boolean toReturn, AccessPathFrontOption returnApf, AccessPathFront apf, + Configuration config +) { + exists(ReturnNodeExt ret | + flowCandOut(call, ret, toReturn, returnApf, apf, config) and + flowCandFwd(ret, true, TAccessPathFrontSome(_), apf, config) + ) +} + +private newtype TAccessPath = + TNil(DataFlowType t) or + TConsNil(TypedContent tc, DataFlowType t) { flowCandConsCand(tc, TFrontNil(t), _) } or + TConsCons(TypedContent tc1, TypedContent tc2, int len) { + flowCandConsCand(tc1, TFrontHead(tc2), _) and len in [2 .. accessPathLimit()] + } + +/** + * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first two + * elements of the list and its length are tracked. If data flows from a source to + * a given node with a given `AccessPath`, this indicates the sequence of + * dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ +abstract private class AccessPath extends TAccessPath { + abstract string toString(); + + abstract TypedContent getHead(); + + abstract int len(); + + abstract DataFlowType getType(); + + abstract AccessPathFront getFront(); + + /** + * Holds if this access path has `head` at the front and may be followed by `tail`. + */ + abstract predicate pop(TypedContent head, AccessPath tail); +} + +private class AccessPathNil extends AccessPath, TNil { + private DataFlowType t; + + AccessPathNil() { this = TNil(t) } + + override string toString() { result = concat(": " + ppReprType(t)) } + + override TypedContent getHead() { none() } + + override int len() { result = 0 } + + override DataFlowType getType() { result = t } + + override AccessPathFront getFront() { result = TFrontNil(t) } + + override predicate pop(TypedContent head, AccessPath tail) { none() } +} + +abstract private class AccessPathCons extends AccessPath { } + +private class AccessPathConsNil extends AccessPathCons, TConsNil { + private TypedContent tc; + private DataFlowType t; + + AccessPathConsNil() { this = TConsNil(tc, t) } + + override string toString() { + // The `concat` becomes "" if `ppReprType` has no result. + result = "[" + tc.toString() + "]" + concat(" : " + ppReprType(t)) + } + + override TypedContent getHead() { result = tc } + + override int len() { result = 1 } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override predicate pop(TypedContent head, AccessPath tail) { head = tc and tail = TNil(t) } +} + +private class AccessPathConsCons extends AccessPathCons, TConsCons { + private TypedContent tc1; + private TypedContent tc2; + private int len; + + AccessPathConsCons() { this = TConsCons(tc1, tc2, len) } + + override string toString() { + if len = 2 + then result = "[" + tc1.toString() + ", " + tc2.toString() + "]" + else result = "[" + tc1.toString() + ", " + tc2.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc1 } + + override int len() { result = len } + + override DataFlowType getType() { result = tc1.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc1) } + + override predicate pop(TypedContent head, AccessPath tail) { + head = tc1 and + ( + tail = TConsCons(tc2, _, len - 1) + or + len = 2 and + tail = TConsNil(tc2, _) + ) + } +} + +/** Gets the access path obtained by popping `tc` from `ap`, if any. */ +private AccessPath pop(TypedContent tc, AccessPath ap) { ap.pop(tc, result) } + +/** Gets the access path obtained by pushing `tc` onto `ap`. */ +private AccessPath push(TypedContent tc, AccessPath ap) { ap = pop(tc, result) } + +private newtype TAccessPathOption = + TAccessPathNone() or + TAccessPathSome(AccessPath ap) + +private class AccessPathOption extends TAccessPathOption { + string toString() { + this = TAccessPathNone() and result = "" + or + this = TAccessPathSome(any(AccessPath ap | result = ap.toString())) + } +} + +/** + * Holds if `node` is reachable with access path `ap` from a source in + * the configuration `config`. + * + * The Boolean `fromArg` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ +private predicate flowFwd( + Node node, boolean fromArg, AccessPathOption argAp, AccessPathFront apf, AccessPath ap, + Configuration config +) { + flowFwd0(node, fromArg, argAp, apf, ap, config) and + flowCand(node, _, _, apf, config) +} + +private predicate flowFwd0( + Node node, boolean fromArg, AccessPathOption argAp, AccessPathFront apf, AccessPath ap, + Configuration config +) { + flowCand(node, _, _, _, config) and + config.isSource(node) and + fromArg = false and + argAp = TAccessPathNone() and + ap = TNil(getNodeType(node)) and + apf = ap.(AccessPathNil).getFront() + or + flowCand(node, _, _, _, unbind(config)) and + ( + exists(Node mid | + flowFwd(mid, fromArg, argAp, apf, ap, config) and + localFlowBigStep(mid, node, true, _, config, _) + ) + or + exists(Node mid, AccessPathNil nil | + flowFwd(mid, fromArg, argAp, _, nil, config) and + localFlowBigStep(mid, node, false, apf, config, _) and + apf = ap.(AccessPathNil).getFront() + ) + or + exists(Node mid | + flowFwd(mid, _, _, apf, ap, config) and + jumpStep(mid, node, config) and + fromArg = false and + argAp = TAccessPathNone() + ) + or + exists(Node mid, AccessPathNil nil | + flowFwd(mid, _, _, _, nil, config) and + additionalJumpStep(mid, node, config) and + fromArg = false and + argAp = TAccessPathNone() and + ap = TNil(getNodeType(node)) and + apf = ap.(AccessPathNil).getFront() + ) + ) + or + // store + exists(TypedContent tc | flowFwdStore(node, tc, pop(tc, ap), apf, fromArg, argAp, config)) + or + // read + exists(TypedContent tc | + flowFwdRead(node, _, push(tc, ap), apf, fromArg, argAp, config) and + flowFwdConsCand(tc, apf, ap, config) + ) + or + // flow into a callable + flowFwdIn(_, node, _, _, apf, ap, config) and + fromArg = true and + if flowCand(node, true, _, apf, config) + then argAp = TAccessPathSome(ap) + else argAp = TAccessPathNone() + or + // flow out of a callable + exists(DataFlowCall call | + flowFwdOut(call, node, fromArg, argAp, apf, ap, config) and + fromArg = false + or + exists(AccessPath argAp0 | + flowFwdOutFromArg(call, node, argAp0, apf, ap, config) and + flowFwdIsEntered(call, fromArg, argAp, argAp0, config) + ) + ) +} + +pragma[nomagic] +private predicate flowFwdStore( + Node node, TypedContent tc, AccessPath ap0, AccessPathFront apf, boolean fromArg, + AccessPathOption argAp, Configuration config +) { + exists(Node mid, AccessPathFront apf0 | + flowFwd(mid, fromArg, argAp, apf0, ap0, config) and + flowFwdStore0(mid, tc, node, apf0, apf, config) + ) +} + +pragma[nomagic] +private predicate storeCand( + Node mid, TypedContent tc, Node node, AccessPathFront apf0, AccessPathFront apf, + Configuration config +) { + storeCand2(mid, tc, node, _, config) and + flowCand(mid, _, _, apf0, config) and + apf.headUsesContent(tc) +} + +pragma[noinline] +private predicate flowFwdStore0( + Node mid, TypedContent tc, Node node, AccessPathFront apf0, AccessPathFrontHead apf, + Configuration config +) { + storeCand(mid, tc, node, apf0, apf, config) and + flowCandConsCand(tc, apf0, config) and + flowCand(node, _, _, apf, unbind(config)) +} + +pragma[nomagic] +private predicate flowFwdRead0( + Node node1, TypedContent tc, AccessPathFrontHead apf0, AccessPath ap0, Node node2, + boolean fromArg, AccessPathOption argAp, Configuration config +) { + flowFwd(node1, fromArg, argAp, apf0, ap0, config) and + readCandFwd(node1, tc, apf0, node2, config) +} + +pragma[nomagic] +private predicate flowFwdRead( + Node node, AccessPathFrontHead apf0, AccessPath ap0, AccessPathFront apf, boolean fromArg, + AccessPathOption argAp, Configuration config +) { + exists(Node mid, TypedContent tc | + flowFwdRead0(mid, tc, apf0, ap0, node, fromArg, argAp, config) and + flowCand(node, _, _, apf, unbind(config)) and + flowCandConsCand(tc, apf, unbind(config)) + ) +} + +pragma[nomagic] +private predicate flowFwdConsCand( + TypedContent tc, AccessPathFront apf, AccessPath ap, Configuration config +) { + exists(Node n | + flowFwd(n, _, _, apf, ap, config) and + flowFwdStore0(n, tc, _, apf, _, config) + ) +} + +pragma[nomagic] +private predicate flowFwdIn( + DataFlowCall call, ParameterNode p, boolean fromArg, AccessPathOption argAp, AccessPathFront apf, + AccessPath ap, Configuration config +) { + exists(ArgumentNode arg, boolean allowsFieldFlow | + flowFwd(arg, fromArg, argAp, apf, ap, config) and + flowIntoCallNodeCand2(call, arg, p, allowsFieldFlow, config) and + flowCand(p, _, _, _, unbind(config)) + | + ap instanceof AccessPathNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowFwdOut( + DataFlowCall call, Node node, boolean fromArg, AccessPathOption argAp, AccessPathFront apf, + AccessPath ap, Configuration config +) { + exists(ReturnNodeExt ret, boolean allowsFieldFlow | + flowFwd(ret, fromArg, argAp, apf, ap, config) and + flowOutOfCallNodeCand2(call, ret, node, allowsFieldFlow, config) and + flowCand(node, _, _, _, unbind(config)) + | + ap instanceof AccessPathNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowFwdOutFromArg( + DataFlowCall call, Node node, AccessPath argAp, AccessPathFront apf, AccessPath ap, + Configuration config +) { + flowFwdOut(call, node, true, TAccessPathSome(argAp), apf, ap, config) +} + +/** + * Holds if an argument to `call` is reached in the flow covered by `flowFwd`. + */ +pragma[nomagic] +private predicate flowFwdIsEntered( + DataFlowCall call, boolean fromArg, AccessPathOption argAp, AccessPath ap, Configuration config +) { + exists(ParameterNode p, AccessPathFront apf | + flowFwdIn(call, p, fromArg, argAp, apf, ap, config) and + flowCand(p, true, TAccessPathFrontSome(_), apf, config) + ) +} + +/** + * Holds if `node` with access path `ap` is part of a path from a source to + * a sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from + * the enclosing callable in order to reach a sink, and if so, `returnAp` + * records the access path of the returned value. + */ +private predicate flow( + Node node, boolean toReturn, AccessPathOption returnAp, AccessPath ap, Configuration config +) { + flow0(node, toReturn, returnAp, ap, config) and + flowFwd(node, _, _, _, ap, config) +} + +private predicate flow0( + Node node, boolean toReturn, AccessPathOption returnAp, AccessPath ap, Configuration config +) { + flowFwd(node, _, _, _, ap, config) and + config.isSink(node) and + toReturn = false and + returnAp = TAccessPathNone() and + ap instanceof AccessPathNil + or + exists(Node mid | + localFlowBigStep(node, mid, true, _, config, _) and + flow(mid, toReturn, returnAp, ap, config) + ) + or + exists(Node mid, AccessPathNil nil | + flowFwd(node, _, _, _, ap, config) and + localFlowBigStep(node, mid, false, _, config, _) and + flow(mid, toReturn, returnAp, nil, config) and + ap instanceof AccessPathNil + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + flow(mid, _, _, ap, config) and + toReturn = false and + returnAp = TAccessPathNone() + ) + or + exists(Node mid, AccessPathNil nil | + flowFwd(node, _, _, _, ap, config) and + additionalJumpStep(node, mid, config) and + flow(mid, _, _, nil, config) and + toReturn = false and + returnAp = TAccessPathNone() and + ap instanceof AccessPathNil + ) + or + // store + exists(TypedContent tc | + flowStore(tc, node, toReturn, returnAp, ap, config) and + flowConsCand(tc, ap, config) + ) + or + // read + exists(Node mid, AccessPath ap0 | + readFlowFwd(node, _, mid, ap, ap0, config) and + flow(mid, toReturn, returnAp, ap0, config) + ) + or + // flow into a callable + exists(DataFlowCall call | + flowIn(call, node, toReturn, returnAp, ap, config) and + toReturn = false + or + exists(AccessPath returnAp0 | + flowInToReturn(call, node, returnAp0, ap, config) and + flowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + ) + or + // flow out of a callable + flowOut(_, node, _, _, ap, config) and + toReturn = true and + if flowFwd(node, true, TAccessPathSome(_), _, ap, config) + then returnAp = TAccessPathSome(ap) + else returnAp = TAccessPathNone() +} + +pragma[nomagic] +private predicate storeFlowFwd( + Node node1, TypedContent tc, Node node2, AccessPath ap, AccessPath ap0, Configuration config +) { + storeCand2(node1, tc, node2, _, config) and + flowFwdStore(node2, tc, ap, _, _, _, config) and + ap0 = push(tc, ap) +} + +pragma[nomagic] +private predicate flowStore( + TypedContent tc, Node node, boolean toReturn, AccessPathOption returnAp, AccessPath ap, + Configuration config +) { + exists(Node mid, AccessPath ap0 | + storeFlowFwd(node, tc, mid, ap, ap0, config) and + flow(mid, toReturn, returnAp, ap0, config) + ) +} + +pragma[nomagic] +private predicate readFlowFwd( + Node node1, TypedContent tc, Node node2, AccessPath ap, AccessPath ap0, Configuration config +) { + exists(AccessPathFrontHead apf | + readCandFwd(node1, tc, apf, node2, config) and + flowFwdRead(node2, apf, ap, _, _, _, config) and + ap0 = pop(tc, ap) and + flowFwdConsCand(tc, _, ap0, unbind(config)) + ) +} + +pragma[nomagic] +private predicate flowConsCand(TypedContent tc, AccessPath ap, Configuration config) { + exists(Node n, Node mid | + flow(mid, _, _, ap, config) and + readFlowFwd(n, tc, mid, _, ap, config) + ) +} + +pragma[nomagic] +private predicate flowOut( + DataFlowCall call, ReturnNodeExt ret, boolean toReturn, AccessPathOption returnAp, AccessPath ap, + Configuration config +) { + exists(Node out, boolean allowsFieldFlow | + flow(out, toReturn, returnAp, ap, config) and + flowOutOfCallNodeCand2(call, ret, out, allowsFieldFlow, config) + | + ap instanceof AccessPathNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowIn( + DataFlowCall call, ArgumentNode arg, boolean toReturn, AccessPathOption returnAp, AccessPath ap, + Configuration config +) { + exists(ParameterNode p, boolean allowsFieldFlow | + flow(p, toReturn, returnAp, ap, config) and + flowIntoCallNodeCand2(call, arg, p, allowsFieldFlow, config) + | + ap instanceof AccessPathNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowInToReturn( + DataFlowCall call, ArgumentNode arg, AccessPath returnAp, AccessPath ap, Configuration config +) { + flowIn(call, arg, true, TAccessPathSome(returnAp), ap, config) +} + +/** + * Holds if an output from `call` is reached in the flow covered by `flow`. + */ +pragma[nomagic] +private predicate flowIsReturned( + DataFlowCall call, boolean toReturn, AccessPathOption returnAp, AccessPath ap, + Configuration config +) { + exists(ReturnNodeExt ret | + flowOut(call, ret, toReturn, returnAp, ap, config) and + flowFwd(ret, true, TAccessPathSome(_), _, ap, config) + ) +} + +bindingset[conf, result] +private Configuration unbind(Configuration conf) { result >= conf and result <= conf } + +private predicate flow(Node n, Configuration config) { flow(n, _, _, _, config) } + +pragma[noinline] +private predicate parameterFlow( + ParameterNode p, AccessPath ap, DataFlowCallable c, Configuration config +) { + flow(p, true, _, ap, config) and + c = p.getEnclosingCallable() +} + +private newtype TSummaryCtx = + TSummaryCtxNone() or + TSummaryCtxSome(ParameterNode p, AccessPath ap) { + exists(ReturnNodeExt ret, Configuration config, AccessPath ap0 | + parameterFlow(p, ap, ret.getEnclosingCallable(), config) and + flow(ret, true, TAccessPathSome(_), ap0, config) and + flowFwd(ret, true, TAccessPathSome(ap), _, ap0, config) + ) + } + +/** + * A context for generating flow summaries. This represents flow entry through + * a specific parameter with an access path of a specific shape. + * + * Summaries are only created for parameters that may flow through. + */ +abstract private class SummaryCtx extends TSummaryCtx { + abstract string toString(); +} + +/** A summary context from which no flow summary can be generated. */ +private class SummaryCtxNone extends SummaryCtx, TSummaryCtxNone { + override string toString() { result = "" } +} + +/** A summary context from which a flow summary can be generated. */ +private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome { + private ParameterNode p; + private AccessPath ap; + + SummaryCtxSome() { this = TSummaryCtxSome(p, ap) } + + int getParameterPos() { p.isParameterOf(_, result) } + + override string toString() { result = p + ": " + ap } + + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + p.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +private newtype TPathNode = + TPathNodeMid(Node node, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config) { + // A PathNode is introduced by a source ... + flow(node, config) and + config.isSource(node) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap = TNil(getNodeType(node)) + or + // ... or a step from an existing PathNode to another node. + exists(PathNodeMid mid | + pathStep(mid, node, cc, sc, ap) and + config = mid.getConfiguration() and + flow(node, _, _, ap, unbind(config)) + ) + } or + TPathNodeSink(Node node, Configuration config) { + config.isSink(node) and + flow(node, unbind(config)) and + ( + // A sink that is also a source ... + config.isSource(node) + or + // ... or a sink that can be reached from a source + exists(PathNodeMid mid | + pathStep(mid, node, _, _, any(AccessPathNil nil)) and + config = unbind(mid.getConfiguration()) + ) + ) + } + +/** + * A `Node` augmented with a call context (except for sinks), an access path, and a configuration. + * Only those `PathNode`s that are reachable from a source are generated. + */ +class PathNode extends TPathNode { + /** Gets a textual representation of this element. */ + string toString() { none() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { none() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + none() + } + + /** Gets the underlying `Node`. */ + Node getNode() { none() } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + private predicate isHidden() { + nodeIsHidden(this.getNode()) and + not this.isSource() and + not this instanceof PathNodeSink + } + + private PathNode getASuccessorIfHidden() { + this.isHidden() and + result = this.(PathNodeImpl).getASuccessorImpl() + } + + /** Gets a successor of this node, if any. */ + final PathNode getASuccessor() { + result = this.(PathNodeImpl).getASuccessorImpl().getASuccessorIfHidden*() and + not this.isHidden() and + not result.isHidden() + } + + /** Holds if this node is a source. */ + predicate isSource() { none() } +} + +abstract private class PathNodeImpl extends PathNode { + abstract PathNode getASuccessorImpl(); + + private string ppAp() { + this instanceof PathNodeSink and result = "" + or + exists(string s | s = this.(PathNodeMid).getAp().toString() | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + this instanceof PathNodeSink and result = "" + or + result = " <" + this.(PathNodeMid).getCallContext().toString() + ">" + } + + override string toString() { result = this.getNode().toString() + ppAp() } + + override string toStringWithContext() { result = this.getNode().toString() + ppAp() + ppCtx() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** Holds if `n` can reach a sink. */ +private predicate reach(PathNode n) { n instanceof PathNodeSink or reach(n.getASuccessor()) } + +/** Holds if `n1.getSucc() = n2` and `n2` can reach a sink. */ +private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and reach(n2) } + +private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2) + +/** + * Provides the query predicates needed to include a graph in a path-problem query. + */ +module PathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PathNode a, PathNode b) { pathSucc(a, b) } + + /** Holds if `n` is a node in the graph of data flow path explanations. */ + query predicate nodes(PathNode n, string key, string val) { + reach(n) and key = "semmle.label" and val = n.toString() + } +} + +/** + * An intermediate flow graph node. This is a triple consisting of a `Node`, + * a `CallContext`, and a `Configuration`. + */ +private class PathNodeMid extends PathNodeImpl, TPathNodeMid { + Node node; + CallContext cc; + SummaryCtx sc; + AccessPath ap; + Configuration config; + + PathNodeMid() { this = TPathNodeMid(node, cc, sc, ap, config) } + + override Node getNode() { result = node } + + CallContext getCallContext() { result = cc } + + SummaryCtx getSummaryCtx() { result = sc } + + AccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + private PathNodeMid getSuccMid() { + pathStep(this, result.getNode(), result.getCallContext(), result.getSummaryCtx(), result.getAp()) and + result.getConfiguration() = unbind(this.getConfiguration()) + } + + override PathNodeImpl getASuccessorImpl() { + // an intermediate step to another intermediate node + result = getSuccMid() + or + // a final step to a sink via zero steps means we merge the last two steps to prevent trivial-looking edges + exists(PathNodeMid mid, PathNodeSink sink | + mid = getSuccMid() and + mid.getNode() = sink.getNode() and + mid.getAp() instanceof AccessPathNil and + sink.getConfiguration() = unbind(mid.getConfiguration()) and + result = sink + ) + } + + override predicate isSource() { + config.isSource(node) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap instanceof AccessPathNil + } +} + +/** + * A flow graph node corresponding to a sink. This is disjoint from the + * intermediate nodes in order to uniquely correspond to a given sink by + * excluding the `CallContext`. + */ +private class PathNodeSink extends PathNodeImpl, TPathNodeSink { + Node node; + Configuration config; + + PathNodeSink() { this = TPathNodeSink(node, config) } + + override Node getNode() { result = node } + + override Configuration getConfiguration() { result = config } + + override PathNode getASuccessorImpl() { none() } + + override predicate isSource() { config.isSource(node) } +} + +/** + * Holds if data may flow from `mid` to `node`. The last step in or out of + * a callable is recorded by `cc`. + */ +private predicate pathStep(PathNodeMid mid, Node node, CallContext cc, SummaryCtx sc, AccessPath ap) { + exists(AccessPath ap0, Node midnode, Configuration conf, LocalCallContext localCC | + midnode = mid.getNode() and + conf = mid.getConfiguration() and + cc = mid.getCallContext() and + sc = mid.getSummaryCtx() and + localCC = getLocalCallContext(cc, midnode.getEnclosingCallable()) and + ap0 = mid.getAp() + | + localFlowBigStep(midnode, node, true, _, conf, localCC) and + ap = ap0 + or + localFlowBigStep(midnode, node, false, ap.getFront(), conf, localCC) and + ap0 instanceof AccessPathNil + ) + or + jumpStep(mid.getNode(), node, mid.getConfiguration()) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap = mid.getAp() + or + additionalJumpStep(mid.getNode(), node, mid.getConfiguration()) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + mid.getAp() instanceof AccessPathNil and + ap = TNil(getNodeType(node)) + or + exists(TypedContent tc | pathStoreStep(mid, node, pop(tc, ap), tc, cc)) and + sc = mid.getSummaryCtx() + or + exists(TypedContent tc | pathReadStep(mid, node, push(tc, ap), tc, cc)) and + sc = mid.getSummaryCtx() + or + pathIntoCallable(mid, node, _, cc, sc, _) and ap = mid.getAp() + or + pathOutOfCallable(mid, node, cc) and ap = mid.getAp() and sc instanceof SummaryCtxNone + or + pathThroughCallable(mid, node, cc, ap) and sc = mid.getSummaryCtx() +} + +pragma[nomagic] +private predicate readCand(Node node1, TypedContent tc, Node node2, Configuration config) { + readCandFwd(node1, tc, _, node2, config) and + flow(node2, config) +} + +pragma[nomagic] +private predicate pathReadStep( + PathNodeMid mid, Node node, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + readCand(mid.getNode(), tc, node, mid.getConfiguration()) and + cc = mid.getCallContext() +} + +pragma[nomagic] +private predicate storeCand(Node node1, TypedContent tc, Node node2, Configuration config) { + storeCand2(node1, tc, node2, _, config) and + flow(node2, config) +} + +pragma[nomagic] +private predicate pathStoreStep( + PathNodeMid mid, Node node, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + storeCand(mid.getNode(), tc, node, mid.getConfiguration()) and + cc = mid.getCallContext() +} + +private predicate pathOutOfCallable0( + PathNodeMid mid, ReturnPosition pos, CallContext innercc, AccessPath ap, Configuration config +) { + pos = getReturnPosition(mid.getNode()) and + innercc = mid.getCallContext() and + not innercc instanceof CallContextCall and + ap = mid.getAp() and + config = mid.getConfiguration() +} + +pragma[nomagic] +private predicate pathOutOfCallable1( + PathNodeMid mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, AccessPath ap, + Configuration config +) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + pathOutOfCallable0(mid, pos, innercc, ap, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) +} + +pragma[noinline] +private Node getAnOutNodeFlow( + ReturnKindExt kind, DataFlowCall call, AccessPath ap, Configuration config +) { + result = kind.getAnOutNode(call) and + flow(result, _, _, ap, config) +} + +/** + * Holds if data may flow from `mid` to `out`. The last step of this path + * is a return from a callable and is recorded by `cc`, if needed. + */ +pragma[noinline] +private predicate pathOutOfCallable(PathNodeMid mid, Node out, CallContext cc) { + exists(ReturnKindExt kind, DataFlowCall call, AccessPath ap, Configuration config | + pathOutOfCallable1(mid, call, kind, cc, ap, config) + | + out = getAnOutNodeFlow(kind, call, ap, config) + ) +} + +/** + * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`. + */ +pragma[noinline] +private predicate pathIntoArg( + PathNodeMid mid, int i, CallContext cc, DataFlowCall call, AccessPath ap +) { + exists(ArgumentNode arg | + arg = mid.getNode() and + cc = mid.getCallContext() and + arg.argumentOf(call, i) and + ap = mid.getAp() + ) +} + +pragma[noinline] +private predicate parameterCand( + DataFlowCallable callable, int i, AccessPath ap, Configuration config +) { + exists(ParameterNode p | + flow(p, _, _, ap, config) and + p.isParameterOf(callable, i) + ) +} + +pragma[nomagic] +private predicate pathIntoCallable0( + PathNodeMid mid, DataFlowCallable callable, int i, CallContext outercc, DataFlowCall call, + AccessPath ap +) { + pathIntoArg(mid, i, outercc, call, ap) and + callable = resolveCall(call, outercc) and + parameterCand(callable, any(int j | j <= i and j >= i), ap, mid.getConfiguration()) +} + +/** + * Holds if data may flow from `mid` to `p` through `call`. The contexts + * before and after entering the callable are `outercc` and `innercc`, + * respectively. + */ +private predicate pathIntoCallable( + PathNodeMid mid, ParameterNode p, CallContext outercc, CallContextCall innercc, SummaryCtx sc, + DataFlowCall call +) { + exists(int i, DataFlowCallable callable, AccessPath ap | + pathIntoCallable0(mid, callable, i, outercc, call, ap) and + p.isParameterOf(callable, i) and + ( + sc = TSummaryCtxSome(p, ap) + or + not exists(TSummaryCtxSome(p, ap)) and + sc = TSummaryCtxNone() + ) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) +} + +/** Holds if data may flow from a parameter given by `sc` to a return of kind `kind`. */ +pragma[nomagic] +private predicate paramFlowsThrough( + ReturnKindExt kind, CallContextCall cc, SummaryCtxSome sc, AccessPath ap, Configuration config +) { + exists(PathNodeMid mid, ReturnNodeExt ret, int pos | + mid.getNode() = ret and + kind = ret.getKind() and + cc = mid.getCallContext() and + sc = mid.getSummaryCtx() and + config = mid.getConfiguration() and + ap = mid.getAp() and + pos = sc.getParameterPos() and + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) +} + +pragma[nomagic] +private predicate pathThroughCallable0( + DataFlowCall call, PathNodeMid mid, ReturnKindExt kind, CallContext cc, AccessPath ap +) { + exists(CallContext innercc, SummaryCtx sc | + pathIntoCallable(mid, _, cc, innercc, sc, call) and + paramFlowsThrough(kind, innercc, sc, ap, unbind(mid.getConfiguration())) + ) +} + +/** + * Holds if data may flow from `mid` through a callable to the node `out`. + * The context `cc` is restored to its value prior to entering the callable. + */ +pragma[noinline] +private predicate pathThroughCallable(PathNodeMid mid, Node out, CallContext cc, AccessPath ap) { + exists(DataFlowCall call, ReturnKindExt kind | + pathThroughCallable0(call, mid, kind, cc, ap) and + out = getAnOutNodeFlow(kind, call, ap, mid.getConfiguration()) + ) +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +private predicate flowsTo( + PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration +) { + flowsource.isSource() and + flowsource.getConfiguration() = configuration and + flowsource.getNode() = source and + (flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and + flowsink.getNode() = sink +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +predicate flowsTo(Node source, Node sink, Configuration configuration) { + flowsTo(_, _, source, sink, configuration) +} + +private module FlowExploration { + private predicate callableStep(DataFlowCallable c1, DataFlowCallable c2, Configuration config) { + exists(Node node1, Node node2 | + jumpStep(node1, node2, config) + or + additionalJumpStep(node1, node2, config) + or + // flow into callable + viableParamArg(_, node2, node1) + or + // flow out of a callable + viableReturnPosOut(_, getReturnPosition(node1), node2) + | + c1 = node1.getEnclosingCallable() and + c2 = node2.getEnclosingCallable() and + c1 != c2 + ) + } + + private predicate interestingCallableSrc(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSource(n) and c = n.getEnclosingCallable()) + or + exists(DataFlowCallable mid | + interestingCallableSrc(mid, config) and callableStep(mid, c, config) + ) + } + + private newtype TCallableExt = + TCallable(DataFlowCallable c, Configuration config) { interestingCallableSrc(c, config) } or + TCallableSrc() + + private predicate callableExtSrc(TCallableSrc src) { any() } + + private predicate callableExtStepFwd(TCallableExt ce1, TCallableExt ce2) { + exists(DataFlowCallable c1, DataFlowCallable c2, Configuration config | + callableStep(c1, c2, config) and + ce1 = TCallable(c1, config) and + ce2 = TCallable(c2, unbind(config)) + ) + or + exists(Node n, Configuration config | + ce1 = TCallableSrc() and + config.isSource(n) and + ce2 = TCallable(n.getEnclosingCallable(), config) + ) + } + + private int distSrcExt(TCallableExt c) = + shortestDistances(callableExtSrc/1, callableExtStepFwd/2)(_, c, result) + + private int distSrc(DataFlowCallable c, Configuration config) { + result = distSrcExt(TCallable(c, config)) - 1 + } + + private newtype TPartialAccessPath = + TPartialNil(DataFlowType t) or + TPartialCons(TypedContent tc, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first + * element of the list and its length are tracked. If data flows from a source to + * a given node with a given `AccessPath`, this indicates the sequence of + * dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ + private class PartialAccessPath extends TPartialAccessPath { + abstract string toString(); + + TypedContent getHead() { this = TPartialCons(result, _) } + + int len() { + this = TPartialNil(_) and result = 0 + or + this = TPartialCons(_, result) + } + + DataFlowType getType() { + this = TPartialNil(result) + or + exists(TypedContent head | this = TPartialCons(head, _) | result = head.getContainerType()) + } + + abstract AccessPathFront getFront(); + } + + private class PartialAccessPathNil extends PartialAccessPath, TPartialNil { + override string toString() { + exists(DataFlowType t | this = TPartialNil(t) | result = concat(": " + ppReprType(t))) + } + + override AccessPathFront getFront() { + exists(DataFlowType t | this = TPartialNil(t) | result = TFrontNil(t)) + } + } + + private class PartialAccessPathCons extends PartialAccessPath, TPartialCons { + override string toString() { + exists(TypedContent tc, int len | this = TPartialCons(tc, len) | + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + ) + } + + override AccessPathFront getFront() { + exists(TypedContent tc | this = TPartialCons(tc, _) | result = TFrontHead(tc)) + } + } + + private newtype TSummaryCtx1 = + TSummaryCtx1None() or + TSummaryCtx1Param(ParameterNode p) + + private newtype TSummaryCtx2 = + TSummaryCtx2None() or + TSummaryCtx2Some(PartialAccessPath ap) + + private newtype TPartialPathNode = + TPartialPathNodeMk( + Node node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap, + Configuration config + ) { + config.isSource(node) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + ap = TPartialNil(getNodeType(node)) and + not fullBarrier(node, config) and + exists(config.explorationLimit()) + or + partialPathNodeMk0(node, cc, sc1, sc2, ap, config) and + distSrc(node.getEnclosingCallable(), config) <= config.explorationLimit() + } + + pragma[nomagic] + private predicate partialPathNodeMk0( + Node node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap, + Configuration config + ) { + exists(PartialPathNode mid | + partialPathStep(mid, node, cc, sc1, sc2, ap, config) and + not fullBarrier(node, config) and + if node instanceof CastingNode + then compatibleTypes(getNodeType(node), ap.getType()) + else any() + ) + } + + /** + * A `Node` augmented with a call context, an access path, and a configuration. + */ + class PartialPathNode extends TPartialPathNode { + /** Gets a textual representation of this element. */ + string toString() { result = this.getNode().toString() + this.ppAp() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { result = this.getNode().toString() + this.ppAp() + this.ppCtx() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + + /** Gets the underlying `Node`. */ + Node getNode() { none() } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + /** Gets a successor of this node, if any. */ + PartialPathNode getASuccessor() { none() } + + /** + * Gets the approximate distance to the nearest source measured in number + * of interprocedural steps. + */ + int getSourceDistance() { + result = distSrc(this.getNode().getEnclosingCallable(), this.getConfiguration()) + } + + private string ppAp() { + exists(string s | s = this.(PartialPathNodePriv).getAp().toString() | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + result = " <" + this.(PartialPathNodePriv).getCallContext().toString() + ">" + } + } + + /** + * Provides the query predicates needed to include a graph in a path-problem query. + */ + module PartialPathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PartialPathNode a, PartialPathNode b) { a.getASuccessor() = b } + } + + private class PartialPathNodePriv extends PartialPathNode { + Node node; + CallContext cc; + TSummaryCtx1 sc1; + TSummaryCtx2 sc2; + PartialAccessPath ap; + Configuration config; + + PartialPathNodePriv() { this = TPartialPathNodeMk(node, cc, sc1, sc2, ap, config) } + + override Node getNode() { result = node } + + CallContext getCallContext() { result = cc } + + TSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TSummaryCtx2 getSummaryCtx2() { result = sc2 } + + PartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodePriv getASuccessor() { + partialPathStep(this, result.getNode(), result.getCallContext(), result.getSummaryCtx1(), + result.getSummaryCtx2(), result.getAp(), result.getConfiguration()) + } + } + + private predicate partialPathStep( + PartialPathNodePriv mid, Node node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + PartialAccessPath ap, Configuration config + ) { + not isUnreachableInCall(node, cc.(CallContextSpecificCall).getCall()) and + ( + localFlowStep(mid.getNode(), node, config) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(mid.getNode(), node, config) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(getNodeType(node)) and + config = mid.getConfiguration() + ) + or + jumpStep(mid.getNode(), node, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(mid.getNode(), node, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(getNodeType(node)) and + config = mid.getConfiguration() + or + partialPathStoreStep(mid, _, _, node, ap) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + config = mid.getConfiguration() + or + exists(PartialAccessPath ap0, TypedContent tc | + partialPathReadStep(mid, ap0, tc, node, cc, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + apConsFwd(ap, tc, ap0, config) and + compatibleTypes(ap.getType(), getNodeType(node)) + ) + or + partialPathIntoCallable(mid, node, _, cc, sc1, sc2, _, ap, config) + or + partialPathOutOfCallable(mid, node, cc, ap, config) and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() + or + partialPathThroughCallable(mid, node, cc, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() + } + + bindingset[result, i] + private int unbindInt(int i) { i <= result and i >= result } + + pragma[inline] + private predicate partialPathStoreStep( + PartialPathNodePriv mid, PartialAccessPath ap1, TypedContent tc, Node node, + PartialAccessPath ap2 + ) { + exists(Node midNode, DataFlowType contentType | + midNode = mid.getNode() and + ap1 = mid.getAp() and + store(midNode, tc, node, contentType) and + ap2.getHead() = tc and + ap2.len() = unbindInt(ap1.len() + 1) and + compatibleTypes(ap1.getType(), contentType) + ) + } + + pragma[nomagic] + private predicate apConsFwd( + PartialAccessPath ap1, TypedContent tc, PartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodePriv mid | + partialPathStoreStep(mid, ap1, tc, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate partialPathReadStep( + PartialPathNodePriv mid, PartialAccessPath ap, TypedContent tc, Node node, CallContext cc, + Configuration config + ) { + exists(Node midNode | + midNode = mid.getNode() and + ap = mid.getAp() and + read(midNode, tc.getContent(), node) and + ap.getHead() = tc and + config = mid.getConfiguration() and + cc = mid.getCallContext() + ) + } + + private predicate partialPathOutOfCallable0( + PartialPathNodePriv mid, ReturnPosition pos, CallContext innercc, PartialAccessPath ap, + Configuration config + ) { + pos = getReturnPosition(mid.getNode()) and + innercc = mid.getCallContext() and + not innercc instanceof CallContextCall and + ap = mid.getAp() and + config = mid.getConfiguration() + } + + pragma[nomagic] + private predicate partialPathOutOfCallable1( + PartialPathNodePriv mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + partialPathOutOfCallable0(mid, pos, innercc, ap, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) + } + + private predicate partialPathOutOfCallable( + PartialPathNodePriv mid, Node out, CallContext cc, PartialAccessPath ap, Configuration config + ) { + exists(ReturnKindExt kind, DataFlowCall call | + partialPathOutOfCallable1(mid, call, kind, cc, ap, config) + | + out = kind.getAnOutNode(call) + ) + } + + pragma[noinline] + private predicate partialPathIntoArg( + PartialPathNodePriv mid, int i, CallContext cc, DataFlowCall call, PartialAccessPath ap, + Configuration config + ) { + exists(ArgumentNode arg | + arg = mid.getNode() and + cc = mid.getCallContext() and + arg.argumentOf(call, i) and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate partialPathIntoCallable0( + PartialPathNodePriv mid, DataFlowCallable callable, int i, CallContext outercc, + DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + partialPathIntoArg(mid, i, outercc, call, ap, config) and + callable = resolveCall(call, outercc) + } + + private predicate partialPathIntoCallable( + PartialPathNodePriv mid, ParameterNode p, CallContext outercc, CallContextCall innercc, + TSummaryCtx1 sc1, TSummaryCtx2 sc2, DataFlowCall call, PartialAccessPath ap, + Configuration config + ) { + exists(int i, DataFlowCallable callable | + partialPathIntoCallable0(mid, callable, i, outercc, call, ap, config) and + p.isParameterOf(callable, i) and + sc1 = TSummaryCtx1Param(p) and + sc2 = TSummaryCtx2Some(ap) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) + } + + pragma[nomagic] + private predicate paramFlowsThroughInPartialPath( + ReturnKindExt kind, CallContextCall cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + PartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodePriv mid, ReturnNodeExt ret | + mid.getNode() = ret and + kind = ret.getKind() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + config = mid.getConfiguration() and + ap = mid.getAp() + ) + } + + pragma[noinline] + private predicate partialPathThroughCallable0( + DataFlowCall call, PartialPathNodePriv mid, ReturnKindExt kind, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(ParameterNode p, CallContext innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2 | + partialPathIntoCallable(mid, p, cc, innercc, sc1, sc2, call, _, config) and + paramFlowsThroughInPartialPath(kind, innercc, sc1, sc2, ap, config) + ) + } + + private predicate partialPathThroughCallable( + PartialPathNodePriv mid, Node out, CallContext cc, PartialAccessPath ap, Configuration config + ) { + exists(DataFlowCall call, ReturnKindExt kind | + partialPathThroughCallable0(call, mid, kind, cc, ap, config) and + out = kind.getAnOutNode(call) + ) + } +} + +import FlowExploration + +private predicate partialFlow( + PartialPathNode source, PartialPathNode node, Configuration configuration +) { + source.getConfiguration() = configuration and + configuration.isSource(source.getNode()) and + node = source.getASuccessor+() +} diff --git a/python/ql/src/experimental/dataflow/internal/DataFlowImpl2.qll b/python/ql/src/experimental/dataflow/internal/DataFlowImpl2.qll new file mode 100644 index 000000000000..5042dce683f6 --- /dev/null +++ b/python/ql/src/experimental/dataflow/internal/DataFlowImpl2.qll @@ -0,0 +1,2985 @@ +/** + * Provides an implementation of global (interprocedural) data flow. This file + * re-exports the local (intraprocedural) data flow analysis from + * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed + * through the `Configuration` class. This file exists in several identical + * copies, allowing queries to use multiple `Configuration` classes that depend + * on each other without introducing mutual recursion among those configurations. + */ + +private import DataFlowImplCommon +private import DataFlowImplSpecific::Private +import DataFlowImplSpecific::Public + +/** + * A configuration of interprocedural data flow analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the global data flow library must define its own unique extension + * of this abstract class. To create a configuration, extend this class with + * a subclass whose characteristic predicate is a unique singleton string. + * For example, write + * + * ```ql + * class MyAnalysisConfiguration extends DataFlow::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isBarrier`. + * // Optionally override `isAdditionalFlowStep`. + * } + * ``` + * Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and + * the edges are those data-flow steps that preserve the value of the node + * along with any additional edges defined by `isAdditionalFlowStep`. + * Specifying nodes in `isBarrier` will remove those nodes from the graph, and + * specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going + * and/or out-going edges from those nodes, respectively. + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ```ql + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but two classes extending + * `DataFlow::Configuration` should never depend on each other. One of them + * should instead depend on a `DataFlow2::Configuration`, a + * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. + */ +abstract class Configuration extends string { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant data flow source. + */ + abstract predicate isSource(Node source); + + /** + * Holds if `sink` is a relevant data flow sink. + */ + abstract predicate isSink(Node sink); + + /** + * Holds if data flow through `node` is prohibited. This completely removes + * `node` from the data flow graph. + */ + predicate isBarrier(Node node) { none() } + + /** Holds if data flow into `node` is prohibited. */ + predicate isBarrierIn(Node node) { none() } + + /** Holds if data flow out of `node` is prohibited. */ + predicate isBarrierOut(Node node) { none() } + + /** Holds if data flow through nodes guarded by `guard` is prohibited. */ + predicate isBarrierGuard(BarrierGuard guard) { none() } + + /** + * Holds if the additional flow step from `node1` to `node2` must be taken + * into account in the analysis. + */ + predicate isAdditionalFlowStep(Node node1, Node node2) { none() } + + /** + * Gets the virtual dispatch branching limit when calculating field flow. + * This can be overridden to a smaller value to improve performance (a + * value of 0 disables field flow), or a larger value to get more results. + */ + int fieldFlowBranchLimit() { result = 2 } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + */ + predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + * + * The corresponding paths are generated from the end-points and the graph + * included in the module `PathGraph`. + */ + predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowTo(Node sink) { hasFlow(_, sink) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowToExpr(DataFlowExpr sink) { hasFlowTo(exprNode(sink)) } + + /** + * Gets the exploration limit for `hasPartialFlow` measured in approximate + * number of interprocedural steps. + */ + int explorationLimit() { none() } + + /** + * Holds if there is a partial data flow path from `source` to `node`. The + * approximate distance between `node` and the closest source is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards sink definitions. + * + * This predicate is intended for dataflow exploration and debugging and may + * perform poorly if the number of sources is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + */ + final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) { + partialFlow(source, node, this) and + dist = node.getSourceDistance() + } +} + +/** + * This class exists to prevent mutual recursion between the user-overridden + * member predicates of `Configuration` and the rest of the data-flow library. + * Good performance cannot be guaranteed in the presence of such recursion, so + * it should be replaced by using more than one copy of the data flow library. + */ +abstract private class ConfigurationRecursionPrevention extends Configuration { + bindingset[this] + ConfigurationRecursionPrevention() { any() } + + override predicate hasFlow(Node source, Node sink) { + strictcount(Node n | this.isSource(n)) < 0 + or + strictcount(Node n | this.isSink(n)) < 0 + or + strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0 + or + super.hasFlow(source, sink) + } +} + +private predicate inBarrier(Node node, Configuration config) { + config.isBarrierIn(node) and + config.isSource(node) +} + +private predicate outBarrier(Node node, Configuration config) { + config.isBarrierOut(node) and + config.isSink(node) +} + +private predicate fullBarrier(Node node, Configuration config) { + config.isBarrier(node) + or + config.isBarrierIn(node) and + not config.isSource(node) + or + config.isBarrierOut(node) and + not config.isSink(node) + or + exists(BarrierGuard g | + config.isBarrierGuard(g) and + node = g.getAGuardedNode() + ) +} + +private class AdditionalFlowStepSource extends Node { + AdditionalFlowStepSource() { any(Configuration c).isAdditionalFlowStep(this, _) } +} + +pragma[noinline] +private predicate isAdditionalFlowStep( + AdditionalFlowStepSource node1, Node node2, DataFlowCallable callable1, Configuration config +) { + config.isAdditionalFlowStep(node1, node2) and + callable1 = node1.getEnclosingCallable() +} + +/** + * Holds if data can flow in one local step from `node1` to `node2`. + */ +private predicate localFlowStep(Node node1, Node node2, Configuration config) { + simpleLocalFlowStep(node1, node2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if the additional step from `node1` to `node2` does not jump between callables. + */ +private predicate additionalLocalFlowStep(Node node1, Node node2, Configuration config) { + isAdditionalFlowStep(node1, node2, node2.getEnclosingCallable(), config) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if data can flow from `node1` to `node2` in a way that discards call contexts. + */ +private predicate jumpStep(Node node1, Node node2, Configuration config) { + jumpStep(node1, node2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if the additional step from `node1` to `node2` jumps between callables. + */ +private predicate additionalJumpStep(Node node1, Node node2, Configuration config) { + exists(DataFlowCallable callable1 | + isAdditionalFlowStep(node1, node2, callable1, config) and + node2.getEnclosingCallable() != callable1 and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) + ) +} + +/** + * Holds if field flow should be used for the given configuration. + */ +private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 } + +/** + * Holds if `node` is reachable from a source in the configuration `config`. + * + * The Boolean `fromArg` records whether the node is reached through an + * argument in a call. + */ +private predicate nodeCandFwd1(Node node, boolean fromArg, Configuration config) { + not fullBarrier(node, config) and + ( + config.isSource(node) and + fromArg = false + or + exists(Node mid | + nodeCandFwd1(mid, fromArg, config) and + localFlowStep(mid, node, config) + ) + or + exists(Node mid | + nodeCandFwd1(mid, fromArg, config) and + additionalLocalFlowStep(mid, node, config) + ) + or + exists(Node mid | + nodeCandFwd1(mid, config) and + jumpStep(mid, node, config) and + fromArg = false + ) + or + exists(Node mid | + nodeCandFwd1(mid, config) and + additionalJumpStep(mid, node, config) and + fromArg = false + ) + or + // store + exists(Node mid | + useFieldFlow(config) and + nodeCandFwd1(mid, fromArg, config) and + store(mid, _, node, _) and + not outBarrier(mid, config) + ) + or + // read + exists(Content c | + nodeCandFwd1Read(c, node, fromArg, config) and + nodeCandFwd1IsStored(c, config) and + not inBarrier(node, config) + ) + or + // flow into a callable + exists(Node arg | + nodeCandFwd1(arg, config) and + viableParamArg(_, node, arg) and + fromArg = true + ) + or + // flow out of a callable + exists(DataFlowCall call | + nodeCandFwd1Out(call, node, false, config) and + fromArg = false + or + nodeCandFwd1OutFromArg(call, node, config) and + nodeCandFwd1IsEntered(call, fromArg, config) + ) + ) +} + +private predicate nodeCandFwd1(Node node, Configuration config) { nodeCandFwd1(node, _, config) } + +pragma[nomagic] +private predicate nodeCandFwd1Read(Content c, Node node, boolean fromArg, Configuration config) { + exists(Node mid | + nodeCandFwd1(mid, fromArg, config) and + read(mid, c, node) + ) +} + +/** + * Holds if `c` is the target of a store in the flow covered by `nodeCandFwd1`. + */ +pragma[nomagic] +private predicate nodeCandFwd1IsStored(Content c, Configuration config) { + exists(Node mid, Node node, TypedContent tc | + not fullBarrier(node, config) and + useFieldFlow(config) and + nodeCandFwd1(mid, config) and + store(mid, tc, node, _) and + c = tc.getContent() + ) +} + +pragma[nomagic] +private predicate nodeCandFwd1ReturnPosition( + ReturnPosition pos, boolean fromArg, Configuration config +) { + exists(ReturnNodeExt ret | + nodeCandFwd1(ret, fromArg, config) and + getReturnPosition(ret) = pos + ) +} + +pragma[nomagic] +private predicate nodeCandFwd1Out(DataFlowCall call, Node out, boolean fromArg, Configuration config) { + exists(ReturnPosition pos | + nodeCandFwd1ReturnPosition(pos, fromArg, config) and + viableReturnPosOut(call, pos, out) + ) +} + +pragma[nomagic] +private predicate nodeCandFwd1OutFromArg(DataFlowCall call, Node node, Configuration config) { + nodeCandFwd1Out(call, node, true, config) +} + +/** + * Holds if an argument to `call` is reached in the flow covered by `nodeCandFwd1`. + */ +pragma[nomagic] +private predicate nodeCandFwd1IsEntered(DataFlowCall call, boolean fromArg, Configuration config) { + exists(ArgumentNode arg | + nodeCandFwd1(arg, fromArg, config) and + viableParamArg(call, _, arg) + ) +} + +bindingset[result, b] +private boolean unbindBool(boolean b) { result != b.booleanNot() } + +/** + * Holds if `node` is part of a path from a source to a sink in the + * configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from + * the enclosing callable in order to reach a sink. + */ +pragma[nomagic] +private predicate nodeCand1(Node node, boolean toReturn, Configuration config) { + nodeCand1_0(node, toReturn, config) and + nodeCandFwd1(node, config) +} + +pragma[nomagic] +private predicate nodeCand1_0(Node node, boolean toReturn, Configuration config) { + nodeCandFwd1(node, config) and + config.isSink(node) and + toReturn = false + or + exists(Node mid | + localFlowStep(node, mid, config) and + nodeCand1(mid, toReturn, config) + ) + or + exists(Node mid | + additionalLocalFlowStep(node, mid, config) and + nodeCand1(mid, toReturn, config) + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + nodeCand1(mid, _, config) and + toReturn = false + ) + or + exists(Node mid | + additionalJumpStep(node, mid, config) and + nodeCand1(mid, _, config) and + toReturn = false + ) + or + // store + exists(Content c | + nodeCand1Store(c, node, toReturn, config) and + nodeCand1IsRead(c, config) + ) + or + // read + exists(Node mid, Content c | + read(node, c, mid) and + nodeCandFwd1IsStored(c, unbind(config)) and + nodeCand1(mid, toReturn, config) + ) + or + // flow into a callable + exists(DataFlowCall call | + nodeCand1In(call, node, false, config) and + toReturn = false + or + nodeCand1InToReturn(call, node, config) and + nodeCand1IsReturned(call, toReturn, config) + ) + or + // flow out of a callable + exists(ReturnPosition pos | + nodeCand1Out(pos, config) and + getReturnPosition(node) = pos and + toReturn = true + ) +} + +/** + * Holds if `c` is the target of a read in the flow covered by `nodeCand1`. + */ +pragma[nomagic] +private predicate nodeCand1IsRead(Content c, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCandFwd1(node, unbind(config)) and + read(node, c, mid) and + nodeCandFwd1IsStored(c, unbind(config)) and + nodeCand1(mid, _, config) + ) +} + +pragma[nomagic] +private predicate nodeCand1Store(Content c, Node node, boolean toReturn, Configuration config) { + exists(Node mid, TypedContent tc | + nodeCand1(mid, toReturn, config) and + nodeCandFwd1IsStored(c, unbind(config)) and + store(node, tc, mid, _) and + c = tc.getContent() + ) +} + +/** + * Holds if `c` is the target of both a read and a store in the flow covered + * by `nodeCand1`. + */ +private predicate nodeCand1IsReadAndStored(Content c, Configuration conf) { + nodeCand1IsRead(c, conf) and + nodeCand1Store(c, _, _, conf) +} + +pragma[nomagic] +private predicate viableReturnPosOutNodeCandFwd1( + DataFlowCall call, ReturnPosition pos, Node out, Configuration config +) { + nodeCandFwd1ReturnPosition(pos, _, config) and + viableReturnPosOut(call, pos, out) +} + +pragma[nomagic] +private predicate nodeCand1Out(ReturnPosition pos, Configuration config) { + exists(DataFlowCall call, Node out | + nodeCand1(out, _, config) and + viableReturnPosOutNodeCandFwd1(call, pos, out, config) + ) +} + +pragma[nomagic] +private predicate viableParamArgNodeCandFwd1( + DataFlowCall call, ParameterNode p, ArgumentNode arg, Configuration config +) { + viableParamArg(call, p, arg) and + nodeCandFwd1(arg, config) +} + +pragma[nomagic] +private predicate nodeCand1In( + DataFlowCall call, ArgumentNode arg, boolean toReturn, Configuration config +) { + exists(ParameterNode p | + nodeCand1(p, toReturn, config) and + viableParamArgNodeCandFwd1(call, p, arg, config) + ) +} + +pragma[nomagic] +private predicate nodeCand1InToReturn(DataFlowCall call, ArgumentNode arg, Configuration config) { + nodeCand1In(call, arg, true, config) +} + +/** + * Holds if an output from `call` is reached in the flow covered by `nodeCand1`. + */ +pragma[nomagic] +private predicate nodeCand1IsReturned(DataFlowCall call, boolean toReturn, Configuration config) { + exists(Node out | + nodeCand1(out, toReturn, config) and + nodeCandFwd1OutFromArg(call, out, config) + ) +} + +pragma[nomagic] +private predicate nodeCand1(Node node, Configuration config) { nodeCand1(node, _, config) } + +private predicate throughFlowNodeCand1(Node node, Configuration config) { + nodeCand1(node, true, config) and + not fullBarrier(node, config) and + not inBarrier(node, config) and + not outBarrier(node, config) +} + +/** Holds if flow may return from `callable`. */ +pragma[nomagic] +private predicate returnFlowCallableNodeCand1( + DataFlowCallable callable, ReturnKindExt kind, Configuration config +) { + exists(ReturnNodeExt ret | + throughFlowNodeCand1(ret, config) and + callable = ret.getEnclosingCallable() and + kind = ret.getKind() + ) +} + +/** + * Holds if flow may enter through `p` and reach a return node making `p` a + * candidate for the origin of a summary. + */ +private predicate parameterThroughFlowNodeCand1(ParameterNode p, Configuration config) { + exists(ReturnKindExt kind | + throughFlowNodeCand1(p, config) and + returnFlowCallableNodeCand1(p.getEnclosingCallable(), kind, config) and + // we don't expect a parameter to return stored in itself + not exists(int pos | + kind.(ParamUpdateReturnKind).getPosition() = pos and p.isParameterOf(_, pos) + ) + ) +} + +pragma[nomagic] +private predicate storeCand1(Node n1, Content c, Node n2, Configuration config) { + exists(TypedContent tc | + nodeCand1IsReadAndStored(c, config) and + nodeCand1(n2, unbind(config)) and + store(n1, tc, n2, _) and + c = tc.getContent() + ) +} + +pragma[nomagic] +private predicate read(Node n1, Content c, Node n2, Configuration config) { + nodeCand1IsReadAndStored(c, config) and + nodeCand1(n2, unbind(config)) and + read(n1, c, n2) +} + +pragma[noinline] +private predicate localFlowStepNodeCand1(Node node1, Node node2, Configuration config) { + nodeCand1(node1, config) and + localFlowStep(node1, node2, config) +} + +pragma[noinline] +private predicate additionalLocalFlowStepNodeCand1(Node node1, Node node2, Configuration config) { + nodeCand1(node1, config) and + additionalLocalFlowStep(node1, node2, config) +} + +pragma[nomagic] +private predicate viableReturnPosOutNodeCand1( + DataFlowCall call, ReturnPosition pos, Node out, Configuration config +) { + nodeCand1(out, _, config) and + viableReturnPosOutNodeCandFwd1(call, pos, out, config) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, ReturnNodeExt ret, Node out, Configuration config +) { + viableReturnPosOutNodeCand1(call, getReturnPosition(ret), out, config) and + nodeCand1(ret, config) and + not outBarrier(ret, config) and + not inBarrier(out, config) +} + +pragma[nomagic] +private predicate viableParamArgNodeCand1( + DataFlowCall call, ParameterNode p, ArgumentNode arg, Configuration config +) { + viableParamArgNodeCandFwd1(call, p, arg, config) and + nodeCand1(arg, config) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgumentNode arg, ParameterNode p, Configuration config +) { + viableParamArgNodeCand1(call, p, arg, config) and + nodeCand1(p, config) and + not outBarrier(arg, config) and + not inBarrier(p, config) +} + +/** + * Gets the amount of forward branching on the origin of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int branch(Node n1, Configuration conf) { + result = + strictcount(Node n | + flowOutOfCallNodeCand1(_, n1, n, conf) or flowIntoCallNodeCand1(_, n1, n, conf) + ) +} + +/** + * Gets the amount of backward branching on the target of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int join(Node n2, Configuration conf) { + result = + strictcount(Node n | + flowOutOfCallNodeCand1(_, n, n2, conf) or flowIntoCallNodeCand1(_, n, n2, conf) + ) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. The + * `allowsFieldFlow` flag indicates whether the branching is within the limit + * specified by the configuration. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, ReturnNodeExt ret, Node out, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, ret, out, config) and + exists(int b, int j | + b = branch(ret, config) and + j = join(out, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. The `allowsFieldFlow` flag indicates whether + * the branching is within the limit specified by the configuration. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgumentNode arg, ParameterNode p, boolean allowsFieldFlow, + Configuration config +) { + flowIntoCallNodeCand1(call, arg, p, config) and + exists(int b, int j | + b = branch(arg, config) and + j = join(p, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if `node` is reachable from a source in the configuration `config`. + * The Boolean `stored` records whether the tracked value is stored into a + * field of `node`. + * + * The Boolean `fromArg` records whether the node is reached through an + * argument in a call, and if so, `argStored` records whether the tracked + * value was stored into a field of the argument. + */ +private predicate nodeCandFwd2( + Node node, boolean fromArg, BooleanOption argStored, boolean stored, Configuration config +) { + nodeCand1(node, config) and + config.isSource(node) and + fromArg = false and + argStored = TBooleanNone() and + stored = false + or + nodeCand1(node, unbind(config)) and + ( + exists(Node mid | + nodeCandFwd2(mid, fromArg, argStored, stored, config) and + localFlowStepNodeCand1(mid, node, config) + ) + or + exists(Node mid | + nodeCandFwd2(mid, fromArg, argStored, stored, config) and + additionalLocalFlowStepNodeCand1(mid, node, config) and + stored = false + ) + or + exists(Node mid | + nodeCandFwd2(mid, _, _, stored, config) and + jumpStep(mid, node, config) and + fromArg = false and + argStored = TBooleanNone() + ) + or + exists(Node mid | + nodeCandFwd2(mid, _, _, stored, config) and + additionalJumpStep(mid, node, config) and + fromArg = false and + argStored = TBooleanNone() and + stored = false + ) + or + // store + exists(Node mid | + nodeCandFwd2(mid, fromArg, argStored, _, config) and + storeCand1(mid, _, node, config) and + stored = true + ) + or + // read + exists(Content c | + nodeCandFwd2Read(c, node, fromArg, argStored, config) and + nodeCandFwd2IsStored(c, stored, config) + ) + or + // flow into a callable + nodeCandFwd2In(_, node, _, _, stored, config) and + fromArg = true and + if parameterThroughFlowNodeCand1(node, config) + then argStored = TBooleanSome(stored) + else argStored = TBooleanNone() + or + // flow out of a callable + exists(DataFlowCall call | + nodeCandFwd2Out(call, node, fromArg, argStored, stored, config) and + fromArg = false + or + exists(boolean argStored0 | + nodeCandFwd2OutFromArg(call, node, argStored0, stored, config) and + nodeCandFwd2IsEntered(call, fromArg, argStored, argStored0, config) + ) + ) + ) +} + +/** + * Holds if `c` is the target of a store in the flow covered by `nodeCandFwd2`. + */ +pragma[noinline] +private predicate nodeCandFwd2IsStored(Content c, boolean stored, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCand1(node, unbind(config)) and + nodeCandFwd2(mid, _, _, stored, config) and + storeCand1(mid, c, node, config) + ) +} + +pragma[nomagic] +private predicate nodeCandFwd2Read( + Content c, Node node, boolean fromArg, BooleanOption argStored, Configuration config +) { + exists(Node mid | + nodeCandFwd2(mid, fromArg, argStored, true, config) and + read(mid, c, node, config) + ) +} + +pragma[nomagic] +private predicate nodeCandFwd2In( + DataFlowCall call, ParameterNode p, boolean fromArg, BooleanOption argStored, boolean stored, + Configuration config +) { + exists(ArgumentNode arg, boolean allowsFieldFlow | + nodeCandFwd2(arg, fromArg, argStored, stored, config) and + flowIntoCallNodeCand1(call, arg, p, allowsFieldFlow, config) + | + stored = false or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate nodeCandFwd2Out( + DataFlowCall call, Node out, boolean fromArg, BooleanOption argStored, boolean stored, + Configuration config +) { + exists(ReturnNodeExt ret, boolean allowsFieldFlow | + nodeCandFwd2(ret, fromArg, argStored, stored, config) and + flowOutOfCallNodeCand1(call, ret, out, allowsFieldFlow, config) + | + stored = false or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate nodeCandFwd2OutFromArg( + DataFlowCall call, Node out, boolean argStored, boolean stored, Configuration config +) { + nodeCandFwd2Out(call, out, true, TBooleanSome(argStored), stored, config) +} + +/** + * Holds if an argument to `call` is reached in the flow covered by `nodeCandFwd2`. + */ +pragma[nomagic] +private predicate nodeCandFwd2IsEntered( + DataFlowCall call, boolean fromArg, BooleanOption argStored, boolean stored, Configuration config +) { + exists(ParameterNode p | + nodeCandFwd2In(call, p, fromArg, argStored, stored, config) and + parameterThroughFlowNodeCand1(p, config) + ) +} + +/** + * Holds if `node` is part of a path from a source to a sink in the + * configuration `config`. The Boolean `read` records whether the tracked + * value must be read from a field of `node` in order to reach a sink. + * + * The Boolean `toReturn` records whether the node must be returned from + * the enclosing callable in order to reach a sink, and if so, `returnRead` + * records whether a field must be read from the returned value. + */ +private predicate nodeCand2( + Node node, boolean toReturn, BooleanOption returnRead, boolean read, Configuration config +) { + nodeCandFwd2(node, _, _, false, config) and + config.isSink(node) and + toReturn = false and + returnRead = TBooleanNone() and + read = false + or + nodeCandFwd2(node, _, _, unbindBool(read), unbind(config)) and + ( + exists(Node mid | + localFlowStepNodeCand1(node, mid, config) and + nodeCand2(mid, toReturn, returnRead, read, config) + ) + or + exists(Node mid | + additionalLocalFlowStepNodeCand1(node, mid, config) and + nodeCand2(mid, toReturn, returnRead, read, config) and + read = false + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + nodeCand2(mid, _, _, read, config) and + toReturn = false and + returnRead = TBooleanNone() + ) + or + exists(Node mid | + additionalJumpStep(node, mid, config) and + nodeCand2(mid, _, _, read, config) and + toReturn = false and + returnRead = TBooleanNone() and + read = false + ) + or + // store + exists(Content c | + nodeCand2Store(c, node, toReturn, returnRead, read, config) and + nodeCand2IsRead(c, read, config) + ) + or + // read + exists(Node mid, Content c, boolean read0 | + read(node, c, mid, config) and + nodeCandFwd2IsStored(c, unbindBool(read0), unbind(config)) and + nodeCand2(mid, toReturn, returnRead, read0, config) and + read = true + ) + or + // flow into a callable + exists(DataFlowCall call | + nodeCand2In(call, node, toReturn, returnRead, read, config) and + toReturn = false + or + exists(boolean returnRead0 | + nodeCand2InToReturn(call, node, returnRead0, read, config) and + nodeCand2IsReturned(call, toReturn, returnRead, returnRead0, config) + ) + ) + or + // flow out of a callable + nodeCand2Out(_, node, _, _, read, config) and + toReturn = true and + if nodeCandFwd2(node, true, TBooleanSome(_), unbindBool(read), config) + then returnRead = TBooleanSome(read) + else returnRead = TBooleanNone() + ) +} + +/** + * Holds if `c` is the target of a read in the flow covered by `nodeCand2`. + */ +pragma[noinline] +private predicate nodeCand2IsRead(Content c, boolean read, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCandFwd2(node, _, _, true, unbind(config)) and + read(node, c, mid, config) and + nodeCandFwd2IsStored(c, unbindBool(read), unbind(config)) and + nodeCand2(mid, _, _, read, config) + ) +} + +pragma[nomagic] +private predicate nodeCand2Store( + Content c, Node node, boolean toReturn, BooleanOption returnRead, boolean stored, + Configuration config +) { + exists(Node mid | + storeCand1(node, c, mid, config) and + nodeCand2(mid, toReturn, returnRead, true, config) and + nodeCandFwd2(node, _, _, stored, unbind(config)) + ) +} + +/** + * Holds if `c` is the target of a store in the flow covered by `nodeCand2`. + */ +pragma[nomagic] +private predicate nodeCand2IsStored(Content c, boolean stored, Configuration conf) { + exists(Node node | + nodeCand2Store(c, node, _, _, stored, conf) and + nodeCand2(node, _, _, stored, conf) + ) +} + +/** + * Holds if `c` is the target of both a store and a read in the path graph + * covered by `nodeCand2`. + */ +pragma[noinline] +private predicate nodeCand2IsReadAndStored(Content c, Configuration conf) { + exists(boolean apNonEmpty | + nodeCand2IsStored(c, apNonEmpty, conf) and + nodeCand2IsRead(c, apNonEmpty, conf) + ) +} + +pragma[nomagic] +private predicate nodeCand2Out( + DataFlowCall call, ReturnNodeExt ret, boolean toReturn, BooleanOption returnRead, boolean read, + Configuration config +) { + exists(Node out, boolean allowsFieldFlow | + nodeCand2(out, toReturn, returnRead, read, config) and + flowOutOfCallNodeCand1(call, ret, out, allowsFieldFlow, config) + | + read = false or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate nodeCand2In( + DataFlowCall call, ArgumentNode arg, boolean toReturn, BooleanOption returnRead, boolean read, + Configuration config +) { + exists(ParameterNode p, boolean allowsFieldFlow | + nodeCand2(p, toReturn, returnRead, read, config) and + flowIntoCallNodeCand1(call, arg, p, allowsFieldFlow, config) + | + read = false or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate nodeCand2InToReturn( + DataFlowCall call, ArgumentNode arg, boolean returnRead, boolean read, Configuration config +) { + nodeCand2In(call, arg, true, TBooleanSome(returnRead), read, config) +} + +/** + * Holds if an output from `call` is reached in the flow covered by `nodeCand2`. + */ +pragma[nomagic] +private predicate nodeCand2IsReturned( + DataFlowCall call, boolean toReturn, BooleanOption returnRead, boolean read, Configuration config +) { + exists(ReturnNodeExt ret | + nodeCand2Out(call, ret, toReturn, returnRead, read, config) and + nodeCandFwd2(ret, true, TBooleanSome(_), read, config) + ) +} + +private predicate nodeCand2(Node node, Configuration config) { nodeCand2(node, _, _, _, config) } + +pragma[nomagic] +private predicate flowOutOfCallNodeCand2( + DataFlowCall call, ReturnNodeExt node1, Node node2, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + nodeCand2(node2, config) and + nodeCand2(node1, unbind(config)) +} + +pragma[nomagic] +private predicate flowIntoCallNodeCand2( + DataFlowCall call, ArgumentNode node1, ParameterNode node2, boolean allowsFieldFlow, + Configuration config +) { + flowIntoCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + nodeCand2(node2, config) and + nodeCand2(node1, unbind(config)) +} + +private module LocalFlowBigStep { + /** + * A node where some checking is required, and hence the big-step relation + * is not allowed to step over. + */ + private class FlowCheckNode extends Node { + FlowCheckNode() { + this instanceof CastNode or + clearsContent(this, _) + } + } + + /** + * Holds if `node` can be the first node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + private predicate localFlowEntry(Node node, Configuration config) { + nodeCand2(node, config) and + ( + config.isSource(node) or + jumpStep(_, node, config) or + additionalJumpStep(_, node, config) or + node instanceof ParameterNode or + node instanceof OutNodeExt or + store(_, _, node, _) or + read(_, _, node) or + node instanceof FlowCheckNode + ) + } + + /** + * Holds if `node` can be the last node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + private predicate localFlowExit(Node node, Configuration config) { + exists(Node next | nodeCand2(next, config) | + jumpStep(node, next, config) or + additionalJumpStep(node, next, config) or + flowIntoCallNodeCand1(_, node, next, config) or + flowOutOfCallNodeCand1(_, node, next, config) or + store(node, _, next, _) or + read(node, _, next) + ) + or + node instanceof FlowCheckNode + or + config.isSink(node) + } + + pragma[noinline] + private predicate additionalLocalFlowStepNodeCand2(Node node1, Node node2, Configuration config) { + additionalLocalFlowStepNodeCand1(node1, node2, config) and + nodeCand2(node1, _, _, false, config) and + nodeCand2(node2, _, _, false, unbind(config)) + } + + /** + * Holds if the local path from `node1` to `node2` is a prefix of a maximal + * subsequence of local flow steps in a dataflow path. + * + * This is the transitive closure of `[additional]localFlowStep` beginning + * at `localFlowEntry`. + */ + pragma[nomagic] + private predicate localFlowStepPlus( + Node node1, Node node2, boolean preservesValue, DataFlowType t, Configuration config, + LocalCallContext cc + ) { + not isUnreachableInCall(node2, cc.(LocalCallContextSpecificCall).getCall()) and + ( + localFlowEntry(node1, config) and + ( + localFlowStepNodeCand1(node1, node2, config) and + preservesValue = true and + t = getNodeType(node1) + or + additionalLocalFlowStepNodeCand2(node1, node2, config) and + preservesValue = false and + t = getNodeType(node2) + ) and + node1 != node2 and + cc.relevantFor(node1.getEnclosingCallable()) and + not isUnreachableInCall(node1, cc.(LocalCallContextSpecificCall).getCall()) and + nodeCand2(node2, unbind(config)) + or + exists(Node mid | + localFlowStepPlus(node1, mid, preservesValue, t, config, cc) and + localFlowStepNodeCand1(mid, node2, config) and + not mid instanceof FlowCheckNode and + nodeCand2(node2, unbind(config)) + ) + or + exists(Node mid | + localFlowStepPlus(node1, mid, _, _, config, cc) and + additionalLocalFlowStepNodeCand2(mid, node2, config) and + not mid instanceof FlowCheckNode and + preservesValue = false and + t = getNodeType(node2) and + nodeCand2(node2, unbind(config)) + ) + ) + } + + /** + * Holds if `node1` can step to `node2` in one or more local steps and this + * path can occur as a maximal subsequence of local steps in a dataflow path. + */ + pragma[nomagic] + predicate localFlowBigStep( + Node node1, Node node2, boolean preservesValue, AccessPathFrontNil apf, Configuration config, + LocalCallContext callContext + ) { + localFlowStepPlus(node1, node2, preservesValue, apf.getType(), config, callContext) and + localFlowExit(node2, config) + } +} + +private import LocalFlowBigStep + +pragma[nomagic] +private predicate readCand2(Node node1, Content c, Node node2, Configuration config) { + read(node1, c, node2, config) and + nodeCand2(node1, _, _, true, unbind(config)) and + nodeCand2(node2, config) and + nodeCand2IsReadAndStored(c, unbind(config)) +} + +pragma[nomagic] +private predicate storeCand2( + Node node1, TypedContent tc, Node node2, DataFlowType contentType, Configuration config +) { + store(node1, tc, node2, contentType) and + nodeCand2(node1, config) and + nodeCand2(node2, _, _, true, unbind(config)) and + nodeCand2IsReadAndStored(tc.getContent(), unbind(config)) +} + +/** + * Holds if `node` is reachable with access path front `apf` from a + * source in the configuration `config`. + * + * The Boolean `fromArg` records whether the node is reached through an + * argument in a call, and if so, `argApf` records the front of the + * access path of that argument. + */ +pragma[nomagic] +private predicate flowCandFwd( + Node node, boolean fromArg, AccessPathFrontOption argApf, AccessPathFront apf, + Configuration config +) { + flowCandFwd0(node, fromArg, argApf, apf, config) and + not apf.isClearedAt(node) and + if node instanceof CastingNode then compatibleTypes(getNodeType(node), apf.getType()) else any() +} + +pragma[nomagic] +private predicate flowCandFwd0( + Node node, boolean fromArg, AccessPathFrontOption argApf, AccessPathFront apf, + Configuration config +) { + nodeCand2(node, _, _, false, config) and + config.isSource(node) and + fromArg = false and + argApf = TAccessPathFrontNone() and + apf = TFrontNil(getNodeType(node)) + or + exists(Node mid | + flowCandFwd(mid, fromArg, argApf, apf, config) and + localFlowBigStep(mid, node, true, _, config, _) + ) + or + exists(Node mid, AccessPathFrontNil nil | + flowCandFwd(mid, fromArg, argApf, nil, config) and + localFlowBigStep(mid, node, false, apf, config, _) + ) + or + exists(Node mid | + flowCandFwd(mid, _, _, apf, config) and + nodeCand2(node, unbind(config)) and + jumpStep(mid, node, config) and + fromArg = false and + argApf = TAccessPathFrontNone() + ) + or + exists(Node mid, AccessPathFrontNil nil | + flowCandFwd(mid, _, _, nil, config) and + nodeCand2(node, unbind(config)) and + additionalJumpStep(mid, node, config) and + fromArg = false and + argApf = TAccessPathFrontNone() and + apf = TFrontNil(getNodeType(node)) + ) + or + // store + exists(Node mid, TypedContent tc, AccessPathFront apf0, DataFlowType contentType | + flowCandFwd(mid, fromArg, argApf, apf0, config) and + storeCand2(mid, tc, node, contentType, config) and + nodeCand2(node, _, _, true, unbind(config)) and + apf.headUsesContent(tc) and + compatibleTypes(apf0.getType(), contentType) + ) + or + // read + exists(TypedContent tc | + flowCandFwdRead(tc, node, fromArg, argApf, config) and + flowCandFwdConsCand(tc, apf, config) and + nodeCand2(node, _, _, unbindBool(apf.toBoolNonEmpty()), unbind(config)) + ) + or + // flow into a callable + flowCandFwdIn(_, node, _, _, apf, config) and + fromArg = true and + if nodeCand2(node, true, _, unbindBool(apf.toBoolNonEmpty()), config) + then argApf = TAccessPathFrontSome(apf) + else argApf = TAccessPathFrontNone() + or + // flow out of a callable + exists(DataFlowCall call | + flowCandFwdOut(call, node, fromArg, argApf, apf, config) and + fromArg = false + or + exists(AccessPathFront argApf0 | + flowCandFwdOutFromArg(call, node, argApf0, apf, config) and + flowCandFwdIsEntered(call, fromArg, argApf, argApf0, config) + ) + ) +} + +pragma[nomagic] +private predicate flowCandFwdConsCand(TypedContent tc, AccessPathFront apf, Configuration config) { + exists(Node mid, Node n, DataFlowType contentType | + flowCandFwd(mid, _, _, apf, config) and + storeCand2(mid, tc, n, contentType, config) and + nodeCand2(n, _, _, true, unbind(config)) and + compatibleTypes(apf.getType(), contentType) + ) +} + +pragma[nomagic] +private predicate flowCandFwdRead0( + Node node1, TypedContent tc, Content c, Node node2, boolean fromArg, AccessPathFrontOption argApf, + AccessPathFrontHead apf, Configuration config +) { + flowCandFwd(node1, fromArg, argApf, apf, config) and + readCand2(node1, c, node2, config) and + apf.headUsesContent(tc) +} + +pragma[nomagic] +private predicate flowCandFwdRead( + TypedContent tc, Node node, boolean fromArg, AccessPathFrontOption argApf, Configuration config +) { + flowCandFwdRead0(_, tc, tc.getContent(), node, fromArg, argApf, _, config) +} + +pragma[nomagic] +private predicate flowCandFwdIn( + DataFlowCall call, ParameterNode p, boolean fromArg, AccessPathFrontOption argApf, + AccessPathFront apf, Configuration config +) { + exists(ArgumentNode arg, boolean allowsFieldFlow | + flowCandFwd(arg, fromArg, argApf, apf, config) and + flowIntoCallNodeCand2(call, arg, p, allowsFieldFlow, config) + | + apf instanceof AccessPathFrontNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowCandFwdOut( + DataFlowCall call, Node node, boolean fromArg, AccessPathFrontOption argApf, AccessPathFront apf, + Configuration config +) { + exists(ReturnNodeExt ret, boolean allowsFieldFlow | + flowCandFwd(ret, fromArg, argApf, apf, config) and + flowOutOfCallNodeCand2(call, ret, node, allowsFieldFlow, config) + | + apf instanceof AccessPathFrontNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowCandFwdOutFromArg( + DataFlowCall call, Node node, AccessPathFront argApf, AccessPathFront apf, Configuration config +) { + flowCandFwdOut(call, node, true, TAccessPathFrontSome(argApf), apf, config) +} + +/** + * Holds if an argument to `call` is reached in the flow covered by `flowCandFwd`. + */ +pragma[nomagic] +private predicate flowCandFwdIsEntered( + DataFlowCall call, boolean fromArg, AccessPathFrontOption argApf, AccessPathFront apf, + Configuration config +) { + exists(ParameterNode p | + flowCandFwdIn(call, p, fromArg, argApf, apf, config) and + nodeCand2(p, true, TBooleanSome(_), unbindBool(apf.toBoolNonEmpty()), config) + ) +} + +/** + * Holds if `node` with access path front `apf` is part of a path from a + * source to a sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from + * the enclosing callable in order to reach a sink, and if so, `returnApf` + * records the front of the access path of the returned value. + */ +pragma[nomagic] +private predicate flowCand( + Node node, boolean toReturn, AccessPathFrontOption returnApf, AccessPathFront apf, + Configuration config +) { + flowCand0(node, toReturn, returnApf, apf, config) and + flowCandFwd(node, _, _, apf, config) +} + +pragma[nomagic] +private predicate flowCand0( + Node node, boolean toReturn, AccessPathFrontOption returnApf, AccessPathFront apf, + Configuration config +) { + flowCandFwd(node, _, _, apf, config) and + config.isSink(node) and + toReturn = false and + returnApf = TAccessPathFrontNone() and + apf instanceof AccessPathFrontNil + or + exists(Node mid | + localFlowBigStep(node, mid, true, _, config, _) and + flowCand(mid, toReturn, returnApf, apf, config) + ) + or + exists(Node mid, AccessPathFrontNil nil | + flowCandFwd(node, _, _, apf, config) and + localFlowBigStep(node, mid, false, _, config, _) and + flowCand(mid, toReturn, returnApf, nil, config) and + apf instanceof AccessPathFrontNil + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + flowCand(mid, _, _, apf, config) and + toReturn = false and + returnApf = TAccessPathFrontNone() + ) + or + exists(Node mid, AccessPathFrontNil nil | + flowCandFwd(node, _, _, apf, config) and + additionalJumpStep(node, mid, config) and + flowCand(mid, _, _, nil, config) and + toReturn = false and + returnApf = TAccessPathFrontNone() and + apf instanceof AccessPathFrontNil + ) + or + // store + exists(TypedContent tc | + flowCandStore(node, tc, apf, toReturn, returnApf, config) and + flowCandConsCand(tc, apf, config) + ) + or + // read + exists(TypedContent tc, AccessPathFront apf0 | + flowCandRead(node, tc, apf, toReturn, returnApf, apf0, config) and + flowCandFwdConsCand(tc, apf0, config) + ) + or + // flow into a callable + exists(DataFlowCall call | + flowCandIn(call, node, toReturn, returnApf, apf, config) and + toReturn = false + or + exists(AccessPathFront returnApf0 | + flowCandInToReturn(call, node, returnApf0, apf, config) and + flowCandIsReturned(call, toReturn, returnApf, returnApf0, config) + ) + ) + or + // flow out of a callable + flowCandOut(_, node, _, _, apf, config) and + toReturn = true and + if flowCandFwd(node, true, _, apf, config) + then returnApf = TAccessPathFrontSome(apf) + else returnApf = TAccessPathFrontNone() +} + +pragma[nomagic] +private predicate readCandFwd( + Node node1, TypedContent tc, AccessPathFront apf, Node node2, Configuration config +) { + flowCandFwdRead0(node1, tc, tc.getContent(), node2, _, _, apf, config) +} + +pragma[nomagic] +private predicate flowCandRead( + Node node, TypedContent tc, AccessPathFront apf, boolean toReturn, + AccessPathFrontOption returnApf, AccessPathFront apf0, Configuration config +) { + exists(Node mid | + readCandFwd(node, tc, apf, mid, config) and + flowCand(mid, toReturn, returnApf, apf0, config) + ) +} + +pragma[nomagic] +private predicate flowCandStore( + Node node, TypedContent tc, AccessPathFront apf, boolean toReturn, + AccessPathFrontOption returnApf, Configuration config +) { + exists(Node mid | + flowCandFwd(node, _, _, apf, config) and + storeCand2(node, tc, mid, _, unbind(config)) and + flowCand(mid, toReturn, returnApf, TFrontHead(tc), unbind(config)) + ) +} + +pragma[nomagic] +private predicate flowCandConsCand(TypedContent tc, AccessPathFront apf, Configuration config) { + flowCandFwdConsCand(tc, apf, config) and + flowCandRead(_, tc, _, _, _, apf, config) +} + +pragma[nomagic] +private predicate flowCandOut( + DataFlowCall call, ReturnNodeExt ret, boolean toReturn, AccessPathFrontOption returnApf, + AccessPathFront apf, Configuration config +) { + exists(Node out, boolean allowsFieldFlow | + flowCand(out, toReturn, returnApf, apf, config) and + flowOutOfCallNodeCand2(call, ret, out, allowsFieldFlow, config) + | + apf instanceof AccessPathFrontNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowCandIn( + DataFlowCall call, ArgumentNode arg, boolean toReturn, AccessPathFrontOption returnApf, + AccessPathFront apf, Configuration config +) { + exists(ParameterNode p, boolean allowsFieldFlow | + flowCand(p, toReturn, returnApf, apf, config) and + flowIntoCallNodeCand2(call, arg, p, allowsFieldFlow, config) + | + apf instanceof AccessPathFrontNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowCandInToReturn( + DataFlowCall call, ArgumentNode arg, AccessPathFront returnApf, AccessPathFront apf, + Configuration config +) { + flowCandIn(call, arg, true, TAccessPathFrontSome(returnApf), apf, config) +} + +/** + * Holds if an output from `call` is reached in the flow covered by `flowCand`. + */ +pragma[nomagic] +private predicate flowCandIsReturned( + DataFlowCall call, boolean toReturn, AccessPathFrontOption returnApf, AccessPathFront apf, + Configuration config +) { + exists(ReturnNodeExt ret | + flowCandOut(call, ret, toReturn, returnApf, apf, config) and + flowCandFwd(ret, true, TAccessPathFrontSome(_), apf, config) + ) +} + +private newtype TAccessPath = + TNil(DataFlowType t) or + TConsNil(TypedContent tc, DataFlowType t) { flowCandConsCand(tc, TFrontNil(t), _) } or + TConsCons(TypedContent tc1, TypedContent tc2, int len) { + flowCandConsCand(tc1, TFrontHead(tc2), _) and len in [2 .. accessPathLimit()] + } + +/** + * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first two + * elements of the list and its length are tracked. If data flows from a source to + * a given node with a given `AccessPath`, this indicates the sequence of + * dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ +abstract private class AccessPath extends TAccessPath { + abstract string toString(); + + abstract TypedContent getHead(); + + abstract int len(); + + abstract DataFlowType getType(); + + abstract AccessPathFront getFront(); + + /** + * Holds if this access path has `head` at the front and may be followed by `tail`. + */ + abstract predicate pop(TypedContent head, AccessPath tail); +} + +private class AccessPathNil extends AccessPath, TNil { + private DataFlowType t; + + AccessPathNil() { this = TNil(t) } + + override string toString() { result = concat(": " + ppReprType(t)) } + + override TypedContent getHead() { none() } + + override int len() { result = 0 } + + override DataFlowType getType() { result = t } + + override AccessPathFront getFront() { result = TFrontNil(t) } + + override predicate pop(TypedContent head, AccessPath tail) { none() } +} + +abstract private class AccessPathCons extends AccessPath { } + +private class AccessPathConsNil extends AccessPathCons, TConsNil { + private TypedContent tc; + private DataFlowType t; + + AccessPathConsNil() { this = TConsNil(tc, t) } + + override string toString() { + // The `concat` becomes "" if `ppReprType` has no result. + result = "[" + tc.toString() + "]" + concat(" : " + ppReprType(t)) + } + + override TypedContent getHead() { result = tc } + + override int len() { result = 1 } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override predicate pop(TypedContent head, AccessPath tail) { head = tc and tail = TNil(t) } +} + +private class AccessPathConsCons extends AccessPathCons, TConsCons { + private TypedContent tc1; + private TypedContent tc2; + private int len; + + AccessPathConsCons() { this = TConsCons(tc1, tc2, len) } + + override string toString() { + if len = 2 + then result = "[" + tc1.toString() + ", " + tc2.toString() + "]" + else result = "[" + tc1.toString() + ", " + tc2.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc1 } + + override int len() { result = len } + + override DataFlowType getType() { result = tc1.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc1) } + + override predicate pop(TypedContent head, AccessPath tail) { + head = tc1 and + ( + tail = TConsCons(tc2, _, len - 1) + or + len = 2 and + tail = TConsNil(tc2, _) + ) + } +} + +/** Gets the access path obtained by popping `tc` from `ap`, if any. */ +private AccessPath pop(TypedContent tc, AccessPath ap) { ap.pop(tc, result) } + +/** Gets the access path obtained by pushing `tc` onto `ap`. */ +private AccessPath push(TypedContent tc, AccessPath ap) { ap = pop(tc, result) } + +private newtype TAccessPathOption = + TAccessPathNone() or + TAccessPathSome(AccessPath ap) + +private class AccessPathOption extends TAccessPathOption { + string toString() { + this = TAccessPathNone() and result = "" + or + this = TAccessPathSome(any(AccessPath ap | result = ap.toString())) + } +} + +/** + * Holds if `node` is reachable with access path `ap` from a source in + * the configuration `config`. + * + * The Boolean `fromArg` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ +private predicate flowFwd( + Node node, boolean fromArg, AccessPathOption argAp, AccessPathFront apf, AccessPath ap, + Configuration config +) { + flowFwd0(node, fromArg, argAp, apf, ap, config) and + flowCand(node, _, _, apf, config) +} + +private predicate flowFwd0( + Node node, boolean fromArg, AccessPathOption argAp, AccessPathFront apf, AccessPath ap, + Configuration config +) { + flowCand(node, _, _, _, config) and + config.isSource(node) and + fromArg = false and + argAp = TAccessPathNone() and + ap = TNil(getNodeType(node)) and + apf = ap.(AccessPathNil).getFront() + or + flowCand(node, _, _, _, unbind(config)) and + ( + exists(Node mid | + flowFwd(mid, fromArg, argAp, apf, ap, config) and + localFlowBigStep(mid, node, true, _, config, _) + ) + or + exists(Node mid, AccessPathNil nil | + flowFwd(mid, fromArg, argAp, _, nil, config) and + localFlowBigStep(mid, node, false, apf, config, _) and + apf = ap.(AccessPathNil).getFront() + ) + or + exists(Node mid | + flowFwd(mid, _, _, apf, ap, config) and + jumpStep(mid, node, config) and + fromArg = false and + argAp = TAccessPathNone() + ) + or + exists(Node mid, AccessPathNil nil | + flowFwd(mid, _, _, _, nil, config) and + additionalJumpStep(mid, node, config) and + fromArg = false and + argAp = TAccessPathNone() and + ap = TNil(getNodeType(node)) and + apf = ap.(AccessPathNil).getFront() + ) + ) + or + // store + exists(TypedContent tc | flowFwdStore(node, tc, pop(tc, ap), apf, fromArg, argAp, config)) + or + // read + exists(TypedContent tc | + flowFwdRead(node, _, push(tc, ap), apf, fromArg, argAp, config) and + flowFwdConsCand(tc, apf, ap, config) + ) + or + // flow into a callable + flowFwdIn(_, node, _, _, apf, ap, config) and + fromArg = true and + if flowCand(node, true, _, apf, config) + then argAp = TAccessPathSome(ap) + else argAp = TAccessPathNone() + or + // flow out of a callable + exists(DataFlowCall call | + flowFwdOut(call, node, fromArg, argAp, apf, ap, config) and + fromArg = false + or + exists(AccessPath argAp0 | + flowFwdOutFromArg(call, node, argAp0, apf, ap, config) and + flowFwdIsEntered(call, fromArg, argAp, argAp0, config) + ) + ) +} + +pragma[nomagic] +private predicate flowFwdStore( + Node node, TypedContent tc, AccessPath ap0, AccessPathFront apf, boolean fromArg, + AccessPathOption argAp, Configuration config +) { + exists(Node mid, AccessPathFront apf0 | + flowFwd(mid, fromArg, argAp, apf0, ap0, config) and + flowFwdStore0(mid, tc, node, apf0, apf, config) + ) +} + +pragma[nomagic] +private predicate storeCand( + Node mid, TypedContent tc, Node node, AccessPathFront apf0, AccessPathFront apf, + Configuration config +) { + storeCand2(mid, tc, node, _, config) and + flowCand(mid, _, _, apf0, config) and + apf.headUsesContent(tc) +} + +pragma[noinline] +private predicate flowFwdStore0( + Node mid, TypedContent tc, Node node, AccessPathFront apf0, AccessPathFrontHead apf, + Configuration config +) { + storeCand(mid, tc, node, apf0, apf, config) and + flowCandConsCand(tc, apf0, config) and + flowCand(node, _, _, apf, unbind(config)) +} + +pragma[nomagic] +private predicate flowFwdRead0( + Node node1, TypedContent tc, AccessPathFrontHead apf0, AccessPath ap0, Node node2, + boolean fromArg, AccessPathOption argAp, Configuration config +) { + flowFwd(node1, fromArg, argAp, apf0, ap0, config) and + readCandFwd(node1, tc, apf0, node2, config) +} + +pragma[nomagic] +private predicate flowFwdRead( + Node node, AccessPathFrontHead apf0, AccessPath ap0, AccessPathFront apf, boolean fromArg, + AccessPathOption argAp, Configuration config +) { + exists(Node mid, TypedContent tc | + flowFwdRead0(mid, tc, apf0, ap0, node, fromArg, argAp, config) and + flowCand(node, _, _, apf, unbind(config)) and + flowCandConsCand(tc, apf, unbind(config)) + ) +} + +pragma[nomagic] +private predicate flowFwdConsCand( + TypedContent tc, AccessPathFront apf, AccessPath ap, Configuration config +) { + exists(Node n | + flowFwd(n, _, _, apf, ap, config) and + flowFwdStore0(n, tc, _, apf, _, config) + ) +} + +pragma[nomagic] +private predicate flowFwdIn( + DataFlowCall call, ParameterNode p, boolean fromArg, AccessPathOption argAp, AccessPathFront apf, + AccessPath ap, Configuration config +) { + exists(ArgumentNode arg, boolean allowsFieldFlow | + flowFwd(arg, fromArg, argAp, apf, ap, config) and + flowIntoCallNodeCand2(call, arg, p, allowsFieldFlow, config) and + flowCand(p, _, _, _, unbind(config)) + | + ap instanceof AccessPathNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowFwdOut( + DataFlowCall call, Node node, boolean fromArg, AccessPathOption argAp, AccessPathFront apf, + AccessPath ap, Configuration config +) { + exists(ReturnNodeExt ret, boolean allowsFieldFlow | + flowFwd(ret, fromArg, argAp, apf, ap, config) and + flowOutOfCallNodeCand2(call, ret, node, allowsFieldFlow, config) and + flowCand(node, _, _, _, unbind(config)) + | + ap instanceof AccessPathNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowFwdOutFromArg( + DataFlowCall call, Node node, AccessPath argAp, AccessPathFront apf, AccessPath ap, + Configuration config +) { + flowFwdOut(call, node, true, TAccessPathSome(argAp), apf, ap, config) +} + +/** + * Holds if an argument to `call` is reached in the flow covered by `flowFwd`. + */ +pragma[nomagic] +private predicate flowFwdIsEntered( + DataFlowCall call, boolean fromArg, AccessPathOption argAp, AccessPath ap, Configuration config +) { + exists(ParameterNode p, AccessPathFront apf | + flowFwdIn(call, p, fromArg, argAp, apf, ap, config) and + flowCand(p, true, TAccessPathFrontSome(_), apf, config) + ) +} + +/** + * Holds if `node` with access path `ap` is part of a path from a source to + * a sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from + * the enclosing callable in order to reach a sink, and if so, `returnAp` + * records the access path of the returned value. + */ +private predicate flow( + Node node, boolean toReturn, AccessPathOption returnAp, AccessPath ap, Configuration config +) { + flow0(node, toReturn, returnAp, ap, config) and + flowFwd(node, _, _, _, ap, config) +} + +private predicate flow0( + Node node, boolean toReturn, AccessPathOption returnAp, AccessPath ap, Configuration config +) { + flowFwd(node, _, _, _, ap, config) and + config.isSink(node) and + toReturn = false and + returnAp = TAccessPathNone() and + ap instanceof AccessPathNil + or + exists(Node mid | + localFlowBigStep(node, mid, true, _, config, _) and + flow(mid, toReturn, returnAp, ap, config) + ) + or + exists(Node mid, AccessPathNil nil | + flowFwd(node, _, _, _, ap, config) and + localFlowBigStep(node, mid, false, _, config, _) and + flow(mid, toReturn, returnAp, nil, config) and + ap instanceof AccessPathNil + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + flow(mid, _, _, ap, config) and + toReturn = false and + returnAp = TAccessPathNone() + ) + or + exists(Node mid, AccessPathNil nil | + flowFwd(node, _, _, _, ap, config) and + additionalJumpStep(node, mid, config) and + flow(mid, _, _, nil, config) and + toReturn = false and + returnAp = TAccessPathNone() and + ap instanceof AccessPathNil + ) + or + // store + exists(TypedContent tc | + flowStore(tc, node, toReturn, returnAp, ap, config) and + flowConsCand(tc, ap, config) + ) + or + // read + exists(Node mid, AccessPath ap0 | + readFlowFwd(node, _, mid, ap, ap0, config) and + flow(mid, toReturn, returnAp, ap0, config) + ) + or + // flow into a callable + exists(DataFlowCall call | + flowIn(call, node, toReturn, returnAp, ap, config) and + toReturn = false + or + exists(AccessPath returnAp0 | + flowInToReturn(call, node, returnAp0, ap, config) and + flowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + ) + or + // flow out of a callable + flowOut(_, node, _, _, ap, config) and + toReturn = true and + if flowFwd(node, true, TAccessPathSome(_), _, ap, config) + then returnAp = TAccessPathSome(ap) + else returnAp = TAccessPathNone() +} + +pragma[nomagic] +private predicate storeFlowFwd( + Node node1, TypedContent tc, Node node2, AccessPath ap, AccessPath ap0, Configuration config +) { + storeCand2(node1, tc, node2, _, config) and + flowFwdStore(node2, tc, ap, _, _, _, config) and + ap0 = push(tc, ap) +} + +pragma[nomagic] +private predicate flowStore( + TypedContent tc, Node node, boolean toReturn, AccessPathOption returnAp, AccessPath ap, + Configuration config +) { + exists(Node mid, AccessPath ap0 | + storeFlowFwd(node, tc, mid, ap, ap0, config) and + flow(mid, toReturn, returnAp, ap0, config) + ) +} + +pragma[nomagic] +private predicate readFlowFwd( + Node node1, TypedContent tc, Node node2, AccessPath ap, AccessPath ap0, Configuration config +) { + exists(AccessPathFrontHead apf | + readCandFwd(node1, tc, apf, node2, config) and + flowFwdRead(node2, apf, ap, _, _, _, config) and + ap0 = pop(tc, ap) and + flowFwdConsCand(tc, _, ap0, unbind(config)) + ) +} + +pragma[nomagic] +private predicate flowConsCand(TypedContent tc, AccessPath ap, Configuration config) { + exists(Node n, Node mid | + flow(mid, _, _, ap, config) and + readFlowFwd(n, tc, mid, _, ap, config) + ) +} + +pragma[nomagic] +private predicate flowOut( + DataFlowCall call, ReturnNodeExt ret, boolean toReturn, AccessPathOption returnAp, AccessPath ap, + Configuration config +) { + exists(Node out, boolean allowsFieldFlow | + flow(out, toReturn, returnAp, ap, config) and + flowOutOfCallNodeCand2(call, ret, out, allowsFieldFlow, config) + | + ap instanceof AccessPathNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowIn( + DataFlowCall call, ArgumentNode arg, boolean toReturn, AccessPathOption returnAp, AccessPath ap, + Configuration config +) { + exists(ParameterNode p, boolean allowsFieldFlow | + flow(p, toReturn, returnAp, ap, config) and + flowIntoCallNodeCand2(call, arg, p, allowsFieldFlow, config) + | + ap instanceof AccessPathNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowInToReturn( + DataFlowCall call, ArgumentNode arg, AccessPath returnAp, AccessPath ap, Configuration config +) { + flowIn(call, arg, true, TAccessPathSome(returnAp), ap, config) +} + +/** + * Holds if an output from `call` is reached in the flow covered by `flow`. + */ +pragma[nomagic] +private predicate flowIsReturned( + DataFlowCall call, boolean toReturn, AccessPathOption returnAp, AccessPath ap, + Configuration config +) { + exists(ReturnNodeExt ret | + flowOut(call, ret, toReturn, returnAp, ap, config) and + flowFwd(ret, true, TAccessPathSome(_), _, ap, config) + ) +} + +bindingset[conf, result] +private Configuration unbind(Configuration conf) { result >= conf and result <= conf } + +private predicate flow(Node n, Configuration config) { flow(n, _, _, _, config) } + +pragma[noinline] +private predicate parameterFlow( + ParameterNode p, AccessPath ap, DataFlowCallable c, Configuration config +) { + flow(p, true, _, ap, config) and + c = p.getEnclosingCallable() +} + +private newtype TSummaryCtx = + TSummaryCtxNone() or + TSummaryCtxSome(ParameterNode p, AccessPath ap) { + exists(ReturnNodeExt ret, Configuration config, AccessPath ap0 | + parameterFlow(p, ap, ret.getEnclosingCallable(), config) and + flow(ret, true, TAccessPathSome(_), ap0, config) and + flowFwd(ret, true, TAccessPathSome(ap), _, ap0, config) + ) + } + +/** + * A context for generating flow summaries. This represents flow entry through + * a specific parameter with an access path of a specific shape. + * + * Summaries are only created for parameters that may flow through. + */ +abstract private class SummaryCtx extends TSummaryCtx { + abstract string toString(); +} + +/** A summary context from which no flow summary can be generated. */ +private class SummaryCtxNone extends SummaryCtx, TSummaryCtxNone { + override string toString() { result = "" } +} + +/** A summary context from which a flow summary can be generated. */ +private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome { + private ParameterNode p; + private AccessPath ap; + + SummaryCtxSome() { this = TSummaryCtxSome(p, ap) } + + int getParameterPos() { p.isParameterOf(_, result) } + + override string toString() { result = p + ": " + ap } + + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + p.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +private newtype TPathNode = + TPathNodeMid(Node node, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config) { + // A PathNode is introduced by a source ... + flow(node, config) and + config.isSource(node) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap = TNil(getNodeType(node)) + or + // ... or a step from an existing PathNode to another node. + exists(PathNodeMid mid | + pathStep(mid, node, cc, sc, ap) and + config = mid.getConfiguration() and + flow(node, _, _, ap, unbind(config)) + ) + } or + TPathNodeSink(Node node, Configuration config) { + config.isSink(node) and + flow(node, unbind(config)) and + ( + // A sink that is also a source ... + config.isSource(node) + or + // ... or a sink that can be reached from a source + exists(PathNodeMid mid | + pathStep(mid, node, _, _, any(AccessPathNil nil)) and + config = unbind(mid.getConfiguration()) + ) + ) + } + +/** + * A `Node` augmented with a call context (except for sinks), an access path, and a configuration. + * Only those `PathNode`s that are reachable from a source are generated. + */ +class PathNode extends TPathNode { + /** Gets a textual representation of this element. */ + string toString() { none() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { none() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + none() + } + + /** Gets the underlying `Node`. */ + Node getNode() { none() } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + private predicate isHidden() { + nodeIsHidden(this.getNode()) and + not this.isSource() and + not this instanceof PathNodeSink + } + + private PathNode getASuccessorIfHidden() { + this.isHidden() and + result = this.(PathNodeImpl).getASuccessorImpl() + } + + /** Gets a successor of this node, if any. */ + final PathNode getASuccessor() { + result = this.(PathNodeImpl).getASuccessorImpl().getASuccessorIfHidden*() and + not this.isHidden() and + not result.isHidden() + } + + /** Holds if this node is a source. */ + predicate isSource() { none() } +} + +abstract private class PathNodeImpl extends PathNode { + abstract PathNode getASuccessorImpl(); + + private string ppAp() { + this instanceof PathNodeSink and result = "" + or + exists(string s | s = this.(PathNodeMid).getAp().toString() | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + this instanceof PathNodeSink and result = "" + or + result = " <" + this.(PathNodeMid).getCallContext().toString() + ">" + } + + override string toString() { result = this.getNode().toString() + ppAp() } + + override string toStringWithContext() { result = this.getNode().toString() + ppAp() + ppCtx() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** Holds if `n` can reach a sink. */ +private predicate reach(PathNode n) { n instanceof PathNodeSink or reach(n.getASuccessor()) } + +/** Holds if `n1.getSucc() = n2` and `n2` can reach a sink. */ +private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and reach(n2) } + +private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2) + +/** + * Provides the query predicates needed to include a graph in a path-problem query. + */ +module PathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PathNode a, PathNode b) { pathSucc(a, b) } + + /** Holds if `n` is a node in the graph of data flow path explanations. */ + query predicate nodes(PathNode n, string key, string val) { + reach(n) and key = "semmle.label" and val = n.toString() + } +} + +/** + * An intermediate flow graph node. This is a triple consisting of a `Node`, + * a `CallContext`, and a `Configuration`. + */ +private class PathNodeMid extends PathNodeImpl, TPathNodeMid { + Node node; + CallContext cc; + SummaryCtx sc; + AccessPath ap; + Configuration config; + + PathNodeMid() { this = TPathNodeMid(node, cc, sc, ap, config) } + + override Node getNode() { result = node } + + CallContext getCallContext() { result = cc } + + SummaryCtx getSummaryCtx() { result = sc } + + AccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + private PathNodeMid getSuccMid() { + pathStep(this, result.getNode(), result.getCallContext(), result.getSummaryCtx(), result.getAp()) and + result.getConfiguration() = unbind(this.getConfiguration()) + } + + override PathNodeImpl getASuccessorImpl() { + // an intermediate step to another intermediate node + result = getSuccMid() + or + // a final step to a sink via zero steps means we merge the last two steps to prevent trivial-looking edges + exists(PathNodeMid mid, PathNodeSink sink | + mid = getSuccMid() and + mid.getNode() = sink.getNode() and + mid.getAp() instanceof AccessPathNil and + sink.getConfiguration() = unbind(mid.getConfiguration()) and + result = sink + ) + } + + override predicate isSource() { + config.isSource(node) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap instanceof AccessPathNil + } +} + +/** + * A flow graph node corresponding to a sink. This is disjoint from the + * intermediate nodes in order to uniquely correspond to a given sink by + * excluding the `CallContext`. + */ +private class PathNodeSink extends PathNodeImpl, TPathNodeSink { + Node node; + Configuration config; + + PathNodeSink() { this = TPathNodeSink(node, config) } + + override Node getNode() { result = node } + + override Configuration getConfiguration() { result = config } + + override PathNode getASuccessorImpl() { none() } + + override predicate isSource() { config.isSource(node) } +} + +/** + * Holds if data may flow from `mid` to `node`. The last step in or out of + * a callable is recorded by `cc`. + */ +private predicate pathStep(PathNodeMid mid, Node node, CallContext cc, SummaryCtx sc, AccessPath ap) { + exists(AccessPath ap0, Node midnode, Configuration conf, LocalCallContext localCC | + midnode = mid.getNode() and + conf = mid.getConfiguration() and + cc = mid.getCallContext() and + sc = mid.getSummaryCtx() and + localCC = getLocalCallContext(cc, midnode.getEnclosingCallable()) and + ap0 = mid.getAp() + | + localFlowBigStep(midnode, node, true, _, conf, localCC) and + ap = ap0 + or + localFlowBigStep(midnode, node, false, ap.getFront(), conf, localCC) and + ap0 instanceof AccessPathNil + ) + or + jumpStep(mid.getNode(), node, mid.getConfiguration()) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap = mid.getAp() + or + additionalJumpStep(mid.getNode(), node, mid.getConfiguration()) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + mid.getAp() instanceof AccessPathNil and + ap = TNil(getNodeType(node)) + or + exists(TypedContent tc | pathStoreStep(mid, node, pop(tc, ap), tc, cc)) and + sc = mid.getSummaryCtx() + or + exists(TypedContent tc | pathReadStep(mid, node, push(tc, ap), tc, cc)) and + sc = mid.getSummaryCtx() + or + pathIntoCallable(mid, node, _, cc, sc, _) and ap = mid.getAp() + or + pathOutOfCallable(mid, node, cc) and ap = mid.getAp() and sc instanceof SummaryCtxNone + or + pathThroughCallable(mid, node, cc, ap) and sc = mid.getSummaryCtx() +} + +pragma[nomagic] +private predicate readCand(Node node1, TypedContent tc, Node node2, Configuration config) { + readCandFwd(node1, tc, _, node2, config) and + flow(node2, config) +} + +pragma[nomagic] +private predicate pathReadStep( + PathNodeMid mid, Node node, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + readCand(mid.getNode(), tc, node, mid.getConfiguration()) and + cc = mid.getCallContext() +} + +pragma[nomagic] +private predicate storeCand(Node node1, TypedContent tc, Node node2, Configuration config) { + storeCand2(node1, tc, node2, _, config) and + flow(node2, config) +} + +pragma[nomagic] +private predicate pathStoreStep( + PathNodeMid mid, Node node, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + storeCand(mid.getNode(), tc, node, mid.getConfiguration()) and + cc = mid.getCallContext() +} + +private predicate pathOutOfCallable0( + PathNodeMid mid, ReturnPosition pos, CallContext innercc, AccessPath ap, Configuration config +) { + pos = getReturnPosition(mid.getNode()) and + innercc = mid.getCallContext() and + not innercc instanceof CallContextCall and + ap = mid.getAp() and + config = mid.getConfiguration() +} + +pragma[nomagic] +private predicate pathOutOfCallable1( + PathNodeMid mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, AccessPath ap, + Configuration config +) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + pathOutOfCallable0(mid, pos, innercc, ap, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) +} + +pragma[noinline] +private Node getAnOutNodeFlow( + ReturnKindExt kind, DataFlowCall call, AccessPath ap, Configuration config +) { + result = kind.getAnOutNode(call) and + flow(result, _, _, ap, config) +} + +/** + * Holds if data may flow from `mid` to `out`. The last step of this path + * is a return from a callable and is recorded by `cc`, if needed. + */ +pragma[noinline] +private predicate pathOutOfCallable(PathNodeMid mid, Node out, CallContext cc) { + exists(ReturnKindExt kind, DataFlowCall call, AccessPath ap, Configuration config | + pathOutOfCallable1(mid, call, kind, cc, ap, config) + | + out = getAnOutNodeFlow(kind, call, ap, config) + ) +} + +/** + * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`. + */ +pragma[noinline] +private predicate pathIntoArg( + PathNodeMid mid, int i, CallContext cc, DataFlowCall call, AccessPath ap +) { + exists(ArgumentNode arg | + arg = mid.getNode() and + cc = mid.getCallContext() and + arg.argumentOf(call, i) and + ap = mid.getAp() + ) +} + +pragma[noinline] +private predicate parameterCand( + DataFlowCallable callable, int i, AccessPath ap, Configuration config +) { + exists(ParameterNode p | + flow(p, _, _, ap, config) and + p.isParameterOf(callable, i) + ) +} + +pragma[nomagic] +private predicate pathIntoCallable0( + PathNodeMid mid, DataFlowCallable callable, int i, CallContext outercc, DataFlowCall call, + AccessPath ap +) { + pathIntoArg(mid, i, outercc, call, ap) and + callable = resolveCall(call, outercc) and + parameterCand(callable, any(int j | j <= i and j >= i), ap, mid.getConfiguration()) +} + +/** + * Holds if data may flow from `mid` to `p` through `call`. The contexts + * before and after entering the callable are `outercc` and `innercc`, + * respectively. + */ +private predicate pathIntoCallable( + PathNodeMid mid, ParameterNode p, CallContext outercc, CallContextCall innercc, SummaryCtx sc, + DataFlowCall call +) { + exists(int i, DataFlowCallable callable, AccessPath ap | + pathIntoCallable0(mid, callable, i, outercc, call, ap) and + p.isParameterOf(callable, i) and + ( + sc = TSummaryCtxSome(p, ap) + or + not exists(TSummaryCtxSome(p, ap)) and + sc = TSummaryCtxNone() + ) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) +} + +/** Holds if data may flow from a parameter given by `sc` to a return of kind `kind`. */ +pragma[nomagic] +private predicate paramFlowsThrough( + ReturnKindExt kind, CallContextCall cc, SummaryCtxSome sc, AccessPath ap, Configuration config +) { + exists(PathNodeMid mid, ReturnNodeExt ret, int pos | + mid.getNode() = ret and + kind = ret.getKind() and + cc = mid.getCallContext() and + sc = mid.getSummaryCtx() and + config = mid.getConfiguration() and + ap = mid.getAp() and + pos = sc.getParameterPos() and + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) +} + +pragma[nomagic] +private predicate pathThroughCallable0( + DataFlowCall call, PathNodeMid mid, ReturnKindExt kind, CallContext cc, AccessPath ap +) { + exists(CallContext innercc, SummaryCtx sc | + pathIntoCallable(mid, _, cc, innercc, sc, call) and + paramFlowsThrough(kind, innercc, sc, ap, unbind(mid.getConfiguration())) + ) +} + +/** + * Holds if data may flow from `mid` through a callable to the node `out`. + * The context `cc` is restored to its value prior to entering the callable. + */ +pragma[noinline] +private predicate pathThroughCallable(PathNodeMid mid, Node out, CallContext cc, AccessPath ap) { + exists(DataFlowCall call, ReturnKindExt kind | + pathThroughCallable0(call, mid, kind, cc, ap) and + out = getAnOutNodeFlow(kind, call, ap, mid.getConfiguration()) + ) +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +private predicate flowsTo( + PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration +) { + flowsource.isSource() and + flowsource.getConfiguration() = configuration and + flowsource.getNode() = source and + (flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and + flowsink.getNode() = sink +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +predicate flowsTo(Node source, Node sink, Configuration configuration) { + flowsTo(_, _, source, sink, configuration) +} + +private module FlowExploration { + private predicate callableStep(DataFlowCallable c1, DataFlowCallable c2, Configuration config) { + exists(Node node1, Node node2 | + jumpStep(node1, node2, config) + or + additionalJumpStep(node1, node2, config) + or + // flow into callable + viableParamArg(_, node2, node1) + or + // flow out of a callable + viableReturnPosOut(_, getReturnPosition(node1), node2) + | + c1 = node1.getEnclosingCallable() and + c2 = node2.getEnclosingCallable() and + c1 != c2 + ) + } + + private predicate interestingCallableSrc(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSource(n) and c = n.getEnclosingCallable()) + or + exists(DataFlowCallable mid | + interestingCallableSrc(mid, config) and callableStep(mid, c, config) + ) + } + + private newtype TCallableExt = + TCallable(DataFlowCallable c, Configuration config) { interestingCallableSrc(c, config) } or + TCallableSrc() + + private predicate callableExtSrc(TCallableSrc src) { any() } + + private predicate callableExtStepFwd(TCallableExt ce1, TCallableExt ce2) { + exists(DataFlowCallable c1, DataFlowCallable c2, Configuration config | + callableStep(c1, c2, config) and + ce1 = TCallable(c1, config) and + ce2 = TCallable(c2, unbind(config)) + ) + or + exists(Node n, Configuration config | + ce1 = TCallableSrc() and + config.isSource(n) and + ce2 = TCallable(n.getEnclosingCallable(), config) + ) + } + + private int distSrcExt(TCallableExt c) = + shortestDistances(callableExtSrc/1, callableExtStepFwd/2)(_, c, result) + + private int distSrc(DataFlowCallable c, Configuration config) { + result = distSrcExt(TCallable(c, config)) - 1 + } + + private newtype TPartialAccessPath = + TPartialNil(DataFlowType t) or + TPartialCons(TypedContent tc, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first + * element of the list and its length are tracked. If data flows from a source to + * a given node with a given `AccessPath`, this indicates the sequence of + * dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ + private class PartialAccessPath extends TPartialAccessPath { + abstract string toString(); + + TypedContent getHead() { this = TPartialCons(result, _) } + + int len() { + this = TPartialNil(_) and result = 0 + or + this = TPartialCons(_, result) + } + + DataFlowType getType() { + this = TPartialNil(result) + or + exists(TypedContent head | this = TPartialCons(head, _) | result = head.getContainerType()) + } + + abstract AccessPathFront getFront(); + } + + private class PartialAccessPathNil extends PartialAccessPath, TPartialNil { + override string toString() { + exists(DataFlowType t | this = TPartialNil(t) | result = concat(": " + ppReprType(t))) + } + + override AccessPathFront getFront() { + exists(DataFlowType t | this = TPartialNil(t) | result = TFrontNil(t)) + } + } + + private class PartialAccessPathCons extends PartialAccessPath, TPartialCons { + override string toString() { + exists(TypedContent tc, int len | this = TPartialCons(tc, len) | + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + ) + } + + override AccessPathFront getFront() { + exists(TypedContent tc | this = TPartialCons(tc, _) | result = TFrontHead(tc)) + } + } + + private newtype TSummaryCtx1 = + TSummaryCtx1None() or + TSummaryCtx1Param(ParameterNode p) + + private newtype TSummaryCtx2 = + TSummaryCtx2None() or + TSummaryCtx2Some(PartialAccessPath ap) + + private newtype TPartialPathNode = + TPartialPathNodeMk( + Node node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap, + Configuration config + ) { + config.isSource(node) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + ap = TPartialNil(getNodeType(node)) and + not fullBarrier(node, config) and + exists(config.explorationLimit()) + or + partialPathNodeMk0(node, cc, sc1, sc2, ap, config) and + distSrc(node.getEnclosingCallable(), config) <= config.explorationLimit() + } + + pragma[nomagic] + private predicate partialPathNodeMk0( + Node node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap, + Configuration config + ) { + exists(PartialPathNode mid | + partialPathStep(mid, node, cc, sc1, sc2, ap, config) and + not fullBarrier(node, config) and + if node instanceof CastingNode + then compatibleTypes(getNodeType(node), ap.getType()) + else any() + ) + } + + /** + * A `Node` augmented with a call context, an access path, and a configuration. + */ + class PartialPathNode extends TPartialPathNode { + /** Gets a textual representation of this element. */ + string toString() { result = this.getNode().toString() + this.ppAp() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { result = this.getNode().toString() + this.ppAp() + this.ppCtx() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + + /** Gets the underlying `Node`. */ + Node getNode() { none() } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + /** Gets a successor of this node, if any. */ + PartialPathNode getASuccessor() { none() } + + /** + * Gets the approximate distance to the nearest source measured in number + * of interprocedural steps. + */ + int getSourceDistance() { + result = distSrc(this.getNode().getEnclosingCallable(), this.getConfiguration()) + } + + private string ppAp() { + exists(string s | s = this.(PartialPathNodePriv).getAp().toString() | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + result = " <" + this.(PartialPathNodePriv).getCallContext().toString() + ">" + } + } + + /** + * Provides the query predicates needed to include a graph in a path-problem query. + */ + module PartialPathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PartialPathNode a, PartialPathNode b) { a.getASuccessor() = b } + } + + private class PartialPathNodePriv extends PartialPathNode { + Node node; + CallContext cc; + TSummaryCtx1 sc1; + TSummaryCtx2 sc2; + PartialAccessPath ap; + Configuration config; + + PartialPathNodePriv() { this = TPartialPathNodeMk(node, cc, sc1, sc2, ap, config) } + + override Node getNode() { result = node } + + CallContext getCallContext() { result = cc } + + TSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TSummaryCtx2 getSummaryCtx2() { result = sc2 } + + PartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodePriv getASuccessor() { + partialPathStep(this, result.getNode(), result.getCallContext(), result.getSummaryCtx1(), + result.getSummaryCtx2(), result.getAp(), result.getConfiguration()) + } + } + + private predicate partialPathStep( + PartialPathNodePriv mid, Node node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + PartialAccessPath ap, Configuration config + ) { + not isUnreachableInCall(node, cc.(CallContextSpecificCall).getCall()) and + ( + localFlowStep(mid.getNode(), node, config) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(mid.getNode(), node, config) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(getNodeType(node)) and + config = mid.getConfiguration() + ) + or + jumpStep(mid.getNode(), node, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(mid.getNode(), node, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(getNodeType(node)) and + config = mid.getConfiguration() + or + partialPathStoreStep(mid, _, _, node, ap) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + config = mid.getConfiguration() + or + exists(PartialAccessPath ap0, TypedContent tc | + partialPathReadStep(mid, ap0, tc, node, cc, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + apConsFwd(ap, tc, ap0, config) and + compatibleTypes(ap.getType(), getNodeType(node)) + ) + or + partialPathIntoCallable(mid, node, _, cc, sc1, sc2, _, ap, config) + or + partialPathOutOfCallable(mid, node, cc, ap, config) and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() + or + partialPathThroughCallable(mid, node, cc, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() + } + + bindingset[result, i] + private int unbindInt(int i) { i <= result and i >= result } + + pragma[inline] + private predicate partialPathStoreStep( + PartialPathNodePriv mid, PartialAccessPath ap1, TypedContent tc, Node node, + PartialAccessPath ap2 + ) { + exists(Node midNode, DataFlowType contentType | + midNode = mid.getNode() and + ap1 = mid.getAp() and + store(midNode, tc, node, contentType) and + ap2.getHead() = tc and + ap2.len() = unbindInt(ap1.len() + 1) and + compatibleTypes(ap1.getType(), contentType) + ) + } + + pragma[nomagic] + private predicate apConsFwd( + PartialAccessPath ap1, TypedContent tc, PartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodePriv mid | + partialPathStoreStep(mid, ap1, tc, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate partialPathReadStep( + PartialPathNodePriv mid, PartialAccessPath ap, TypedContent tc, Node node, CallContext cc, + Configuration config + ) { + exists(Node midNode | + midNode = mid.getNode() and + ap = mid.getAp() and + read(midNode, tc.getContent(), node) and + ap.getHead() = tc and + config = mid.getConfiguration() and + cc = mid.getCallContext() + ) + } + + private predicate partialPathOutOfCallable0( + PartialPathNodePriv mid, ReturnPosition pos, CallContext innercc, PartialAccessPath ap, + Configuration config + ) { + pos = getReturnPosition(mid.getNode()) and + innercc = mid.getCallContext() and + not innercc instanceof CallContextCall and + ap = mid.getAp() and + config = mid.getConfiguration() + } + + pragma[nomagic] + private predicate partialPathOutOfCallable1( + PartialPathNodePriv mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + partialPathOutOfCallable0(mid, pos, innercc, ap, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) + } + + private predicate partialPathOutOfCallable( + PartialPathNodePriv mid, Node out, CallContext cc, PartialAccessPath ap, Configuration config + ) { + exists(ReturnKindExt kind, DataFlowCall call | + partialPathOutOfCallable1(mid, call, kind, cc, ap, config) + | + out = kind.getAnOutNode(call) + ) + } + + pragma[noinline] + private predicate partialPathIntoArg( + PartialPathNodePriv mid, int i, CallContext cc, DataFlowCall call, PartialAccessPath ap, + Configuration config + ) { + exists(ArgumentNode arg | + arg = mid.getNode() and + cc = mid.getCallContext() and + arg.argumentOf(call, i) and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate partialPathIntoCallable0( + PartialPathNodePriv mid, DataFlowCallable callable, int i, CallContext outercc, + DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + partialPathIntoArg(mid, i, outercc, call, ap, config) and + callable = resolveCall(call, outercc) + } + + private predicate partialPathIntoCallable( + PartialPathNodePriv mid, ParameterNode p, CallContext outercc, CallContextCall innercc, + TSummaryCtx1 sc1, TSummaryCtx2 sc2, DataFlowCall call, PartialAccessPath ap, + Configuration config + ) { + exists(int i, DataFlowCallable callable | + partialPathIntoCallable0(mid, callable, i, outercc, call, ap, config) and + p.isParameterOf(callable, i) and + sc1 = TSummaryCtx1Param(p) and + sc2 = TSummaryCtx2Some(ap) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) + } + + pragma[nomagic] + private predicate paramFlowsThroughInPartialPath( + ReturnKindExt kind, CallContextCall cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + PartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodePriv mid, ReturnNodeExt ret | + mid.getNode() = ret and + kind = ret.getKind() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + config = mid.getConfiguration() and + ap = mid.getAp() + ) + } + + pragma[noinline] + private predicate partialPathThroughCallable0( + DataFlowCall call, PartialPathNodePriv mid, ReturnKindExt kind, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(ParameterNode p, CallContext innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2 | + partialPathIntoCallable(mid, p, cc, innercc, sc1, sc2, call, _, config) and + paramFlowsThroughInPartialPath(kind, innercc, sc1, sc2, ap, config) + ) + } + + private predicate partialPathThroughCallable( + PartialPathNodePriv mid, Node out, CallContext cc, PartialAccessPath ap, Configuration config + ) { + exists(DataFlowCall call, ReturnKindExt kind | + partialPathThroughCallable0(call, mid, kind, cc, ap, config) and + out = kind.getAnOutNode(call) + ) + } +} + +import FlowExploration + +private predicate partialFlow( + PartialPathNode source, PartialPathNode node, Configuration configuration +) { + source.getConfiguration() = configuration and + configuration.isSource(source.getNode()) and + node = source.getASuccessor+() +} diff --git a/python/ql/src/experimental/dataflow/internal/DataFlowImplCommon.qll b/python/ql/src/experimental/dataflow/internal/DataFlowImplCommon.qll new file mode 100644 index 000000000000..27ab1d01feba --- /dev/null +++ b/python/ql/src/experimental/dataflow/internal/DataFlowImplCommon.qll @@ -0,0 +1,812 @@ +private import DataFlowImplSpecific::Private +private import DataFlowImplSpecific::Public +import Cached + +cached +private module Cached { + /** + * Holds if `p` is the `i`th parameter of a viable dispatch target of `call`. + * The instance parameter is considered to have index `-1`. + */ + pragma[nomagic] + private predicate viableParam(DataFlowCall call, int i, ParameterNode p) { + p.isParameterOf(viableCallable(call), i) + } + + /** + * Holds if `arg` is a possible argument to `p` in `call`, taking virtual + * dispatch into account. + */ + cached + predicate viableParamArg(DataFlowCall call, ParameterNode p, ArgumentNode arg) { + exists(int i | + viableParam(call, i, p) and + arg.argumentOf(call, i) and + compatibleTypes(getNodeType(arg), getNodeType(p)) + ) + } + + pragma[nomagic] + private ReturnPosition viableReturnPos(DataFlowCall call, ReturnKindExt kind) { + viableCallable(call) = result.getCallable() and + kind = result.getKind() + } + + /** + * Holds if a value at return position `pos` can be returned to `out` via `call`, + * taking virtual dispatch into account. + */ + cached + predicate viableReturnPosOut(DataFlowCall call, ReturnPosition pos, Node out) { + exists(ReturnKindExt kind | + pos = viableReturnPos(call, kind) and + out = kind.getAnOutNode(call) + ) + } + + /** Provides predicates for calculating flow-through summaries. */ + private module FlowThrough { + /** + * The first flow-through approximation: + * + * - Input access paths are abstracted with a Boolean parameter + * that indicates (non-)emptiness. + */ + private module Cand { + /** + * Holds if `p` can flow to `node` in the same callable using only + * value-preserving steps. + * + * `read` indicates whether it is contents of `p` that can flow to `node`. + */ + pragma[nomagic] + private predicate parameterValueFlowCand(ParameterNode p, Node node, boolean read) { + p = node and + read = false + or + // local flow + exists(Node mid | + parameterValueFlowCand(p, mid, read) and + simpleLocalFlowStep(mid, node) + ) + or + // read + exists(Node mid | + parameterValueFlowCand(p, mid, false) and + readStep(mid, _, node) and + read = true + ) + or + // flow through: no prior read + exists(ArgumentNode arg | + parameterValueFlowArgCand(p, arg, false) and + argumentValueFlowsThroughCand(arg, node, read) + ) + or + // flow through: no read inside method + exists(ArgumentNode arg | + parameterValueFlowArgCand(p, arg, read) and + argumentValueFlowsThroughCand(arg, node, false) + ) + } + + pragma[nomagic] + private predicate parameterValueFlowArgCand(ParameterNode p, ArgumentNode arg, boolean read) { + parameterValueFlowCand(p, arg, read) + } + + pragma[nomagic] + predicate parameterValueFlowsToPreUpdateCand(ParameterNode p, PostUpdateNode n) { + parameterValueFlowCand(p, n.getPreUpdateNode(), false) + } + + /** + * Holds if `p` can flow to a return node of kind `kind` in the same + * callable using only value-preserving steps, not taking call contexts + * into account. + * + * `read` indicates whether it is contents of `p` that can flow to the return + * node. + */ + predicate parameterValueFlowReturnCand(ParameterNode p, ReturnKind kind, boolean read) { + exists(ReturnNode ret | + parameterValueFlowCand(p, ret, read) and + kind = ret.getKind() + ) + } + + pragma[nomagic] + private predicate argumentValueFlowsThroughCand0( + DataFlowCall call, ArgumentNode arg, ReturnKind kind, boolean read + ) { + exists(ParameterNode param | viableParamArg(call, param, arg) | + parameterValueFlowReturnCand(param, kind, read) + ) + } + + /** + * Holds if `arg` flows to `out` through a call using only value-preserving steps, + * not taking call contexts into account. + * + * `read` indicates whether it is contents of `arg` that can flow to `out`. + */ + predicate argumentValueFlowsThroughCand(ArgumentNode arg, Node out, boolean read) { + exists(DataFlowCall call, ReturnKind kind | + argumentValueFlowsThroughCand0(call, arg, kind, read) and + out = getAnOutNode(call, kind) + ) + } + + predicate cand(ParameterNode p, Node n) { + parameterValueFlowCand(p, n, _) and + ( + parameterValueFlowReturnCand(p, _, _) + or + parameterValueFlowsToPreUpdateCand(p, _) + ) + } + } + + /** + * The final flow-through calculation: + * + * - Calculated flow is either value-preserving (`read = TReadStepTypesNone()`) + * or summarized as a single read step with before and after types recorded + * in the `ReadStepTypesOption` parameter. + * - Types are checked using the `compatibleTypes()` relation. + */ + private module Final { + /** + * Holds if `p` can flow to `node` in the same callable using only + * value-preserving steps and possibly a single read step, not taking + * call contexts into account. + * + * If a read step was taken, then `read` captures the `Content`, the + * container type, and the content type. + */ + predicate parameterValueFlow(ParameterNode p, Node node, ReadStepTypesOption read) { + parameterValueFlow0(p, node, read) and + if node instanceof CastingNode + then + // normal flow through + read = TReadStepTypesNone() and + compatibleTypes(getNodeType(p), getNodeType(node)) + or + // getter + compatibleTypes(read.getContentType(), getNodeType(node)) + else any() + } + + pragma[nomagic] + private predicate parameterValueFlow0(ParameterNode p, Node node, ReadStepTypesOption read) { + p = node and + Cand::cand(p, _) and + read = TReadStepTypesNone() + or + // local flow + exists(Node mid | + parameterValueFlow(p, mid, read) and + simpleLocalFlowStep(mid, node) + ) + or + // read + exists(Node mid | + parameterValueFlow(p, mid, TReadStepTypesNone()) and + readStepWithTypes(mid, read.getContainerType(), read.getContent(), node, + read.getContentType()) and + Cand::parameterValueFlowReturnCand(p, _, true) and + compatibleTypes(getNodeType(p), read.getContainerType()) + ) + or + parameterValueFlow0_0(TReadStepTypesNone(), p, node, read) + } + + pragma[nomagic] + private predicate parameterValueFlow0_0( + ReadStepTypesOption mustBeNone, ParameterNode p, Node node, ReadStepTypesOption read + ) { + // flow through: no prior read + exists(ArgumentNode arg | + parameterValueFlowArg(p, arg, mustBeNone) and + argumentValueFlowsThrough(arg, read, node) + ) + or + // flow through: no read inside method + exists(ArgumentNode arg | + parameterValueFlowArg(p, arg, read) and + argumentValueFlowsThrough(arg, mustBeNone, node) + ) + } + + pragma[nomagic] + private predicate parameterValueFlowArg( + ParameterNode p, ArgumentNode arg, ReadStepTypesOption read + ) { + parameterValueFlow(p, arg, read) and + Cand::argumentValueFlowsThroughCand(arg, _, _) + } + + pragma[nomagic] + private predicate argumentValueFlowsThrough0( + DataFlowCall call, ArgumentNode arg, ReturnKind kind, ReadStepTypesOption read + ) { + exists(ParameterNode param | viableParamArg(call, param, arg) | + parameterValueFlowReturn(param, kind, read) + ) + } + + /** + * Holds if `arg` flows to `out` through a call using only + * value-preserving steps and possibly a single read step, not taking + * call contexts into account. + * + * If a read step was taken, then `read` captures the `Content`, the + * container type, and the content type. + */ + pragma[nomagic] + predicate argumentValueFlowsThrough(ArgumentNode arg, ReadStepTypesOption read, Node out) { + exists(DataFlowCall call, ReturnKind kind | + argumentValueFlowsThrough0(call, arg, kind, read) and + out = getAnOutNode(call, kind) + | + // normal flow through + read = TReadStepTypesNone() and + compatibleTypes(getNodeType(arg), getNodeType(out)) + or + // getter + compatibleTypes(getNodeType(arg), read.getContainerType()) and + compatibleTypes(read.getContentType(), getNodeType(out)) + ) + } + + /** + * Holds if `arg` flows to `out` through a call using only + * value-preserving steps and a single read step, not taking call + * contexts into account, thus representing a getter-step. + */ + predicate getterStep(ArgumentNode arg, Content c, Node out) { + argumentValueFlowsThrough(arg, TReadStepTypesSome(_, c, _), out) + } + + /** + * Holds if `p` can flow to a return node of kind `kind` in the same + * callable using only value-preserving steps and possibly a single read + * step. + * + * If a read step was taken, then `read` captures the `Content`, the + * container type, and the content type. + */ + private predicate parameterValueFlowReturn( + ParameterNode p, ReturnKind kind, ReadStepTypesOption read + ) { + exists(ReturnNode ret | + parameterValueFlow(p, ret, read) and + kind = ret.getKind() + ) + } + } + + import Final + } + + import FlowThrough + + cached + private module DispatchWithCallContext { + /** + * Holds if the call context `ctx` reduces the set of viable run-time + * dispatch targets of call `call` in `c`. + */ + cached + predicate reducedViableImplInCallContext(DataFlowCall call, DataFlowCallable c, DataFlowCall ctx) { + exists(int tgts, int ctxtgts | + mayBenefitFromCallContext(call, c) and + c = viableCallable(ctx) and + ctxtgts = count(viableImplInCallContext(call, ctx)) and + tgts = strictcount(viableCallable(call)) and + ctxtgts < tgts + ) + } + + /** + * Gets a viable run-time dispatch target for the call `call` in the + * context `ctx`. This is restricted to those calls for which a context + * makes a difference. + */ + cached + DataFlowCallable prunedViableImplInCallContext(DataFlowCall call, DataFlowCall ctx) { + result = viableImplInCallContext(call, ctx) and + reducedViableImplInCallContext(call, _, ctx) + } + + /** + * Holds if flow returning from callable `c` to call `call` might return + * further and if this path restricts the set of call sites that can be + * returned to. + */ + cached + predicate reducedViableImplInReturn(DataFlowCallable c, DataFlowCall call) { + exists(int tgts, int ctxtgts | + mayBenefitFromCallContext(call, _) and + c = viableCallable(call) and + ctxtgts = count(DataFlowCall ctx | c = viableImplInCallContext(call, ctx)) and + tgts = strictcount(DataFlowCall ctx | viableCallable(ctx) = call.getEnclosingCallable()) and + ctxtgts < tgts + ) + } + + /** + * Gets a viable run-time dispatch target for the call `call` in the + * context `ctx`. This is restricted to those calls and results for which + * the return flow from the result to `call` restricts the possible context + * `ctx`. + */ + cached + DataFlowCallable prunedViableImplInCallContextReverse(DataFlowCall call, DataFlowCall ctx) { + result = viableImplInCallContext(call, ctx) and + reducedViableImplInReturn(result, call) + } + } + + import DispatchWithCallContext + + /** + * Holds if `p` can flow to the pre-update node associated with post-update + * node `n`, in the same callable, using only value-preserving steps. + */ + cached + predicate parameterValueFlowsToPreUpdate(ParameterNode p, PostUpdateNode n) { + parameterValueFlow(p, n.getPreUpdateNode(), TReadStepTypesNone()) + } + + private predicate store( + Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType + ) { + storeStep(node1, c, node2) and + readStep(_, c, _) and + contentType = getNodeType(node1) and + containerType = getNodeType(node2) + or + exists(Node n1, Node n2 | + n1 = node1.(PostUpdateNode).getPreUpdateNode() and + n2 = node2.(PostUpdateNode).getPreUpdateNode() + | + argumentValueFlowsThrough(n2, TReadStepTypesSome(containerType, c, contentType), n1) + or + readStep(n2, c, n1) and + contentType = getNodeType(n1) and + containerType = getNodeType(n2) + ) + } + + /** + * Holds if data can flow from `node1` to `node2` via a direct assignment to + * `f`. + * + * This includes reverse steps through reads when the result of the read has + * been stored into, in order to handle cases like `x.f1.f2 = y`. + */ + cached + predicate store(Node node1, TypedContent tc, Node node2, DataFlowType contentType) { + store(node1, tc.getContent(), node2, contentType, tc.getContainerType()) + } + + /** + * Holds if the call context `call` either improves virtual dispatch in + * `callable` or if it allows us to prune unreachable nodes in `callable`. + */ + cached + predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) { + reducedViableImplInCallContext(_, callable, call) + or + exists(Node n | n.getEnclosingCallable() = callable | isUnreachableInCall(n, call)) + } + + cached + newtype TCallContext = + TAnyCallContext() or + TSpecificCall(DataFlowCall call) { recordDataFlowCallSite(call, _) } or + TSomeCall() or + TReturn(DataFlowCallable c, DataFlowCall call) { reducedViableImplInReturn(c, call) } + + cached + newtype TReturnPosition = + TReturnPosition0(DataFlowCallable c, ReturnKindExt kind) { + exists(ReturnNodeExt ret | + c = returnNodeGetEnclosingCallable(ret) and + kind = ret.getKind() + ) + } + + cached + newtype TLocalFlowCallContext = + TAnyLocalCall() or + TSpecificLocalCall(DataFlowCall call) { isUnreachableInCall(_, call) } + + cached + newtype TReturnKindExt = + TValueReturn(ReturnKind kind) or + TParamUpdate(int pos) { exists(ParameterNode p | p.isParameterOf(_, pos)) } + + cached + newtype TBooleanOption = + TBooleanNone() or + TBooleanSome(boolean b) { b = true or b = false } + + cached + newtype TTypedContent = MkTypedContent(Content c, DataFlowType t) { store(_, c, _, _, t) } + + cached + newtype TAccessPathFront = + TFrontNil(DataFlowType t) or + TFrontHead(TypedContent tc) + + cached + newtype TAccessPathFrontOption = + TAccessPathFrontNone() or + TAccessPathFrontSome(AccessPathFront apf) +} + +/** + * A `Node` at which a cast can occur such that the type should be checked. + */ +class CastingNode extends Node { + CastingNode() { + this instanceof ParameterNode or + this instanceof CastNode or + this instanceof OutNodeExt or + // For reads, `x.f`, we want to check that the tracked type after the read (which + // is obtained by popping the head of the access path stack) is compatible with + // the type of `x.f`. + readStep(_, _, this) + } +} + +private predicate readStepWithTypes( + Node n1, DataFlowType container, Content c, Node n2, DataFlowType content +) { + readStep(n1, c, n2) and + container = getNodeType(n1) and + content = getNodeType(n2) +} + +private newtype TReadStepTypesOption = + TReadStepTypesNone() or + TReadStepTypesSome(DataFlowType container, Content c, DataFlowType content) { + readStepWithTypes(_, container, c, _, content) + } + +private class ReadStepTypesOption extends TReadStepTypesOption { + predicate isSome() { this instanceof TReadStepTypesSome } + + DataFlowType getContainerType() { this = TReadStepTypesSome(result, _, _) } + + Content getContent() { this = TReadStepTypesSome(_, result, _) } + + DataFlowType getContentType() { this = TReadStepTypesSome(_, _, result) } + + string toString() { if this.isSome() then result = "Some(..)" else result = "None()" } +} + +/** + * A call context to restrict the targets of virtual dispatch, prune local flow, + * and match the call sites of flow into a method with flow out of a method. + * + * There are four cases: + * - `TAnyCallContext()` : No restrictions on method flow. + * - `TSpecificCall(DataFlowCall call)` : Flow entered through the + * given `call`. This call improves the set of viable + * dispatch targets for at least one method call in the current callable + * or helps prune unreachable nodes in the current callable. + * - `TSomeCall()` : Flow entered through a parameter. The + * originating call does not improve the set of dispatch targets for any + * method call in the current callable and was therefore not recorded. + * - `TReturn(Callable c, DataFlowCall call)` : Flow reached `call` from `c` and + * this dispatch target of `call` implies a reduced set of dispatch origins + * to which data may flow if it should reach a `return` statement. + */ +abstract class CallContext extends TCallContext { + abstract string toString(); + + /** Holds if this call context is relevant for `callable`. */ + abstract predicate relevantFor(DataFlowCallable callable); +} + +class CallContextAny extends CallContext, TAnyCallContext { + override string toString() { result = "CcAny" } + + override predicate relevantFor(DataFlowCallable callable) { any() } +} + +abstract class CallContextCall extends CallContext { } + +class CallContextSpecificCall extends CallContextCall, TSpecificCall { + override string toString() { + exists(DataFlowCall call | this = TSpecificCall(call) | result = "CcCall(" + call + ")") + } + + override predicate relevantFor(DataFlowCallable callable) { + recordDataFlowCallSite(getCall(), callable) + } + + DataFlowCall getCall() { this = TSpecificCall(result) } +} + +class CallContextSomeCall extends CallContextCall, TSomeCall { + override string toString() { result = "CcSomeCall" } + + override predicate relevantFor(DataFlowCallable callable) { + exists(ParameterNode p | p.getEnclosingCallable() = callable) + } +} + +class CallContextReturn extends CallContext, TReturn { + override string toString() { + exists(DataFlowCall call | this = TReturn(_, call) | result = "CcReturn(" + call + ")") + } + + override predicate relevantFor(DataFlowCallable callable) { + exists(DataFlowCall call | this = TReturn(_, call) and call.getEnclosingCallable() = callable) + } +} + +/** + * A call context that is relevant for pruning local flow. + */ +abstract class LocalCallContext extends TLocalFlowCallContext { + abstract string toString(); + + /** Holds if this call context is relevant for `callable`. */ + abstract predicate relevantFor(DataFlowCallable callable); +} + +class LocalCallContextAny extends LocalCallContext, TAnyLocalCall { + override string toString() { result = "LocalCcAny" } + + override predicate relevantFor(DataFlowCallable callable) { any() } +} + +class LocalCallContextSpecificCall extends LocalCallContext, TSpecificLocalCall { + LocalCallContextSpecificCall() { this = TSpecificLocalCall(call) } + + DataFlowCall call; + + DataFlowCall getCall() { result = call } + + override string toString() { result = "LocalCcCall(" + call + ")" } + + override predicate relevantFor(DataFlowCallable callable) { relevantLocalCCtx(call, callable) } +} + +private predicate relevantLocalCCtx(DataFlowCall call, DataFlowCallable callable) { + exists(Node n | n.getEnclosingCallable() = callable and isUnreachableInCall(n, call)) +} + +/** + * Gets the local call context given the call context and the callable that + * the contexts apply to. + */ +LocalCallContext getLocalCallContext(CallContext ctx, DataFlowCallable callable) { + ctx.relevantFor(callable) and + if relevantLocalCCtx(ctx.(CallContextSpecificCall).getCall(), callable) + then result.(LocalCallContextSpecificCall).getCall() = ctx.(CallContextSpecificCall).getCall() + else result instanceof LocalCallContextAny +} + +/** + * A node from which flow can return to the caller. This is either a regular + * `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter. + */ +class ReturnNodeExt extends Node { + ReturnNodeExt() { + this instanceof ReturnNode or + parameterValueFlowsToPreUpdate(_, this) + } + + /** Gets the kind of this returned value. */ + ReturnKindExt getKind() { + result = TValueReturn(this.(ReturnNode).getKind()) + or + exists(ParameterNode p, int pos | + parameterValueFlowsToPreUpdate(p, this) and + p.isParameterOf(_, pos) and + result = TParamUpdate(pos) + ) + } +} + +/** + * A node to which data can flow from a call. Either an ordinary out node + * or a post-update node associated with a call argument. + */ +class OutNodeExt extends Node { + OutNodeExt() { + this instanceof OutNode + or + this.(PostUpdateNode).getPreUpdateNode() instanceof ArgumentNode + } +} + +/** + * An extended return kind. A return kind describes how data can be returned + * from a callable. This can either be through a returned value or an updated + * parameter. + */ +abstract class ReturnKindExt extends TReturnKindExt { + /** Gets a textual representation of this return kind. */ + abstract string toString(); + + /** Gets a node corresponding to data flow out of `call`. */ + abstract OutNodeExt getAnOutNode(DataFlowCall call); +} + +class ValueReturnKind extends ReturnKindExt, TValueReturn { + private ReturnKind kind; + + ValueReturnKind() { this = TValueReturn(kind) } + + ReturnKind getKind() { result = kind } + + override string toString() { result = kind.toString() } + + override OutNodeExt getAnOutNode(DataFlowCall call) { + result = getAnOutNode(call, this.getKind()) + } +} + +class ParamUpdateReturnKind extends ReturnKindExt, TParamUpdate { + private int pos; + + ParamUpdateReturnKind() { this = TParamUpdate(pos) } + + int getPosition() { result = pos } + + override string toString() { result = "param update " + pos } + + override OutNodeExt getAnOutNode(DataFlowCall call) { + exists(ArgumentNode arg | + result.(PostUpdateNode).getPreUpdateNode() = arg and + arg.argumentOf(call, this.getPosition()) + ) + } +} + +/** A callable tagged with a relevant return kind. */ +class ReturnPosition extends TReturnPosition0 { + private DataFlowCallable c; + private ReturnKindExt kind; + + ReturnPosition() { this = TReturnPosition0(c, kind) } + + /** Gets the callable. */ + DataFlowCallable getCallable() { result = c } + + /** Gets the return kind. */ + ReturnKindExt getKind() { result = kind } + + /** Gets a textual representation of this return position. */ + string toString() { result = "[" + kind + "] " + c } +} + +pragma[noinline] +private DataFlowCallable returnNodeGetEnclosingCallable(ReturnNodeExt ret) { + result = ret.getEnclosingCallable() +} + +pragma[noinline] +private ReturnPosition getReturnPosition0(ReturnNodeExt ret, ReturnKindExt kind) { + result.getCallable() = returnNodeGetEnclosingCallable(ret) and + kind = result.getKind() +} + +pragma[noinline] +ReturnPosition getReturnPosition(ReturnNodeExt ret) { + result = getReturnPosition0(ret, ret.getKind()) +} + +bindingset[cc, callable] +predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall call) { + cc instanceof CallContextAny and callable = viableCallable(call) + or + exists(DataFlowCallable c0, DataFlowCall call0 | + call0.getEnclosingCallable() = callable and + cc = TReturn(c0, call0) and + c0 = prunedViableImplInCallContextReverse(call0, call) + ) +} + +bindingset[call, cc] +DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) { + exists(DataFlowCall ctx | cc = TSpecificCall(ctx) | + if reducedViableImplInCallContext(call, _, ctx) + then result = prunedViableImplInCallContext(call, ctx) + else result = viableCallable(call) + ) + or + result = viableCallable(call) and cc instanceof CallContextSomeCall + or + result = viableCallable(call) and cc instanceof CallContextAny + or + result = viableCallable(call) and cc instanceof CallContextReturn +} + +predicate read = readStep/3; + +/** An optional Boolean value. */ +class BooleanOption extends TBooleanOption { + string toString() { + this = TBooleanNone() and result = "" + or + this = TBooleanSome(any(boolean b | result = b.toString())) + } +} + +/** Content tagged with the type of a containing object. */ +class TypedContent extends MkTypedContent { + private Content c; + private DataFlowType t; + + TypedContent() { this = MkTypedContent(c, t) } + + /** Gets the content. */ + Content getContent() { result = c } + + /** Gets the container type. */ + DataFlowType getContainerType() { result = t } + + /** Gets a textual representation of this content. */ + string toString() { result = c.toString() } +} + +/** + * The front of an access path. This is either a head or a nil. + */ +abstract class AccessPathFront extends TAccessPathFront { + abstract string toString(); + + abstract DataFlowType getType(); + + abstract boolean toBoolNonEmpty(); + + predicate headUsesContent(TypedContent tc) { this = TFrontHead(tc) } + + predicate isClearedAt(Node n) { + exists(TypedContent tc | + this.headUsesContent(tc) and + clearsContent(n, tc.getContent()) + ) + } +} + +class AccessPathFrontNil extends AccessPathFront, TFrontNil { + private DataFlowType t; + + AccessPathFrontNil() { this = TFrontNil(t) } + + override string toString() { result = ppReprType(t) } + + override DataFlowType getType() { result = t } + + override boolean toBoolNonEmpty() { result = false } +} + +class AccessPathFrontHead extends AccessPathFront, TFrontHead { + private TypedContent tc; + + AccessPathFrontHead() { this = TFrontHead(tc) } + + override string toString() { result = tc.toString() } + + override DataFlowType getType() { result = tc.getContainerType() } + + override boolean toBoolNonEmpty() { result = true } +} + +/** An optional access path front. */ +class AccessPathFrontOption extends TAccessPathFrontOption { + string toString() { + this = TAccessPathFrontNone() and result = "" + or + this = TAccessPathFrontSome(any(AccessPathFront apf | result = apf.toString())) + } +} diff --git a/python/ql/src/experimental/dataflow/internal/DataFlowImplConsistency.qll b/python/ql/src/experimental/dataflow/internal/DataFlowImplConsistency.qll new file mode 100644 index 000000000000..5bacc1385010 --- /dev/null +++ b/python/ql/src/experimental/dataflow/internal/DataFlowImplConsistency.qll @@ -0,0 +1,166 @@ +/** + * Provides consistency queries for checking invariants in the language-specific + * data-flow classes and predicates. + */ + +private import DataFlowImplSpecific::Private +private import DataFlowImplSpecific::Public +private import tainttracking1.TaintTrackingParameter::Private +private import tainttracking1.TaintTrackingParameter::Public + +module Consistency { + private class RelevantNode extends Node { + RelevantNode() { + this instanceof ArgumentNode or + this instanceof ParameterNode or + this instanceof ReturnNode or + this = getAnOutNode(_, _) or + simpleLocalFlowStep(this, _) or + simpleLocalFlowStep(_, this) or + jumpStep(this, _) or + jumpStep(_, this) or + storeStep(this, _, _) or + storeStep(_, _, this) or + readStep(this, _, _) or + readStep(_, _, this) or + defaultAdditionalTaintStep(this, _) or + defaultAdditionalTaintStep(_, this) + } + } + + query predicate uniqueEnclosingCallable(Node n, string msg) { + exists(int c | + n instanceof RelevantNode and + c = count(n.getEnclosingCallable()) and + c != 1 and + msg = "Node should have one enclosing callable but has " + c + "." + ) + } + + query predicate uniqueType(Node n, string msg) { + exists(int c | + n instanceof RelevantNode and + c = count(getNodeType(n)) and + c != 1 and + msg = "Node should have one type but has " + c + "." + ) + } + + query predicate uniqueNodeLocation(Node n, string msg) { + exists(int c | + c = + count(string filepath, int startline, int startcolumn, int endline, int endcolumn | + n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + ) and + c != 1 and + msg = "Node should have one location but has " + c + "." + ) + } + + query predicate missingLocation(string msg) { + exists(int c | + c = + strictcount(Node n | + not exists(string filepath, int startline, int startcolumn, int endline, int endcolumn | + n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + ) + ) and + msg = "Nodes without location: " + c + ) + } + + query predicate uniqueNodeToString(Node n, string msg) { + exists(int c | + c = count(n.toString()) and + c != 1 and + msg = "Node should have one toString but has " + c + "." + ) + } + + query predicate missingToString(string msg) { + exists(int c | + c = strictcount(Node n | not exists(n.toString())) and + msg = "Nodes without toString: " + c + ) + } + + query predicate parameterCallable(ParameterNode p, string msg) { + exists(DataFlowCallable c | p.isParameterOf(c, _) and c != p.getEnclosingCallable()) and + msg = "Callable mismatch for parameter." + } + + query predicate localFlowIsLocal(Node n1, Node n2, string msg) { + simpleLocalFlowStep(n1, n2) and + n1.getEnclosingCallable() != n2.getEnclosingCallable() and + msg = "Local flow step does not preserve enclosing callable." + } + + private DataFlowType typeRepr() { result = getNodeType(_) } + + query predicate compatibleTypesReflexive(DataFlowType t, string msg) { + t = typeRepr() and + not compatibleTypes(t, t) and + msg = "Type compatibility predicate is not reflexive." + } + + query predicate unreachableNodeCCtx(Node n, DataFlowCall call, string msg) { + isUnreachableInCall(n, call) and + exists(DataFlowCallable c | + c = n.getEnclosingCallable() and + not viableCallable(call) = c + ) and + msg = "Call context for isUnreachableInCall is inconsistent with call graph." + } + + query predicate localCallNodes(DataFlowCall call, Node n, string msg) { + ( + n = getAnOutNode(call, _) and + msg = "OutNode and call does not share enclosing callable." + or + n.(ArgumentNode).argumentOf(call, _) and + msg = "ArgumentNode and call does not share enclosing callable." + ) and + n.getEnclosingCallable() != call.getEnclosingCallable() + } + + query predicate postIsNotPre(PostUpdateNode n, string msg) { + n.getPreUpdateNode() = n and msg = "PostUpdateNode should not equal its pre-update node." + } + + query predicate postHasUniquePre(PostUpdateNode n, string msg) { + exists(int c | + c = count(n.getPreUpdateNode()) and + c != 1 and + msg = "PostUpdateNode should have one pre-update node but has " + c + "." + ) + } + + query predicate uniquePostUpdate(Node n, string msg) { + 1 < strictcount(PostUpdateNode post | post.getPreUpdateNode() = n) and + msg = "Node has multiple PostUpdateNodes." + } + + query predicate postIsInSameCallable(PostUpdateNode n, string msg) { + n.getEnclosingCallable() != n.getPreUpdateNode().getEnclosingCallable() and + msg = "PostUpdateNode does not share callable with its pre-update node." + } + + private predicate hasPost(Node n) { exists(PostUpdateNode post | post.getPreUpdateNode() = n) } + + query predicate reverseRead(Node n, string msg) { + exists(Node n2 | readStep(n, _, n2) and hasPost(n2) and not hasPost(n)) and + msg = "Origin of readStep is missing a PostUpdateNode." + } + + query predicate storeIsPostUpdate(Node n, string msg) { + storeStep(_, _, n) and + not n instanceof PostUpdateNode and + msg = "Store targets should be PostUpdateNodes." + } + + query predicate argHasPostUpdate(ArgumentNode n, string msg) { + not hasPost(n) and + not isImmutableOrUnobservable(n) and + msg = "ArgumentNode is missing PostUpdateNode." + } +} diff --git a/python/ql/src/experimental/dataflow/internal/DataFlowImplSpecific.qll b/python/ql/src/experimental/dataflow/internal/DataFlowImplSpecific.qll new file mode 100644 index 000000000000..8e18162df569 --- /dev/null +++ b/python/ql/src/experimental/dataflow/internal/DataFlowImplSpecific.qll @@ -0,0 +1,12 @@ +/** + * Provides Python-specific definitions for use in the data flow library. + */ +module Private { + import DataFlowPrivate +// import DataFlowDispatch +} + +module Public { + import DataFlowPublic + import DataFlowUtil +} diff --git a/python/ql/src/experimental/dataflow/internal/DataFlowPrivate.qll b/python/ql/src/experimental/dataflow/internal/DataFlowPrivate.qll new file mode 100644 index 000000000000..95c72ef7e144 --- /dev/null +++ b/python/ql/src/experimental/dataflow/internal/DataFlowPrivate.qll @@ -0,0 +1,280 @@ +private import python +private import DataFlowPublic + +//-------- +// Data flow graph +//-------- +//-------- +// Nodes +//-------- +/** + * A node associated with an object after an operation that might have + * changed its state. + * + * This can be either the argument to a callable after the callable returns + * (which might have mutated the argument), or the qualifier of a field after + * an update to the field. + * + * Nodes corresponding to AST elements, for example `ExprNode`, usually refer + * to the value before the update with the exception of `ObjectCreation`, + * which represents the value after the constructor has run. + */ +abstract class PostUpdateNode extends Node { + /** Gets the node before the state update. */ + abstract Node getPreUpdateNode(); +} + +class DataFlowExpr = Expr; + +/** + * Flow between ESSA variables. + * This includes both local and global variables. + * Flow comes from definitions, uses and refinements. + */ +// TODO: Consider constraining `nodeFrom` and `nodeTo` to be in the same scope. +module EssaFlow { + predicate essaFlowStep(Node nodeFrom, Node nodeTo) { + // Definition + // `x = f(42)` + // nodeFrom is `f(42)`, cfg node + // nodeTo is `x`, essa var + nodeFrom.(CfgNode).getNode() = + nodeTo.(EssaNode).getVar().getDefinition().(AssignmentDefinition).getValue() + or + // With definition + // `with f(42) as x:` + // nodeFrom is `f(42)`, cfg node + // nodeTo is `x`, essa var + exists(With with, ControlFlowNode contextManager, ControlFlowNode var | + nodeFrom.(CfgNode).getNode() = contextManager and + nodeTo.(EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and + // see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll` + with.getContextExpr() = contextManager.getNode() and + with.getOptionalVars() = var.getNode() and + contextManager.strictlyDominates(var) + ) + or + // Use + // `y = 42` + // `x = f(y)` + // nodeFrom is `y` on first line, essa var + // nodeTo is `y` on second line, cfg node + nodeFrom.(EssaNode).getVar().getAUse() = nodeTo.(CfgNode).getNode() + or + // Refinements + exists(EssaEdgeRefinement r | + nodeTo.(EssaNode).getVar() = r.getVariable() and + nodeFrom.(EssaNode).getVar() = r.getInput() + ) + or + exists(EssaNodeRefinement r | + nodeTo.(EssaNode).getVar() = r.getVariable() and + nodeFrom.(EssaNode).getVar() = r.getInput() + ) + or + exists(PhiFunction p | + nodeTo.(EssaNode).getVar() = p.getVariable() and + nodeFrom.(EssaNode).getVar() = p.getAnInput() + ) + } +} + +//-------- +// Local flow +//-------- +/** + * This is the local flow predicate that is used as a building block in global + * data flow. It is a strict subset of the `localFlowStep` predicate, as it + * excludes SSA flow through instance fields. + */ +predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) { + not nodeFrom.(EssaNode).getVar() instanceof GlobalSsaVariable and + not nodeTo.(EssaNode).getVar() instanceof GlobalSsaVariable and + EssaFlow::essaFlowStep(nodeFrom, nodeTo) +} + +// TODO: Make modules for these headings +//-------- +// Global flow +//-------- +/** Represents a callable */ +class DataFlowCallable = CallableValue; + +/** Represents a call to a callable */ +class DataFlowCall extends CallNode { + DataFlowCallable callable; + + DataFlowCall() { this = callable.getACall() } + + /** Get the callable to which this call goes. */ + DataFlowCallable getCallable() { result = callable } + + /** Gets the enclosing callable of this call. */ + DataFlowCallable getEnclosingCallable() { result.getScope() = this.getNode().getScope() } +} + +/** A data flow node that represents a call argument. */ +class ArgumentNode extends CfgNode { + ArgumentNode() { exists(DataFlowCall call, int pos | node = call.getArg(pos)) } + + /** Holds if this argument occurs at the given position in the given call. */ + predicate argumentOf(DataFlowCall call, int pos) { node = call.getArg(pos) } + + /** Gets the call in which this node is an argument. */ + final DataFlowCall getCall() { this.argumentOf(result, _) } +} + +/** Gets a viable run-time target for the call `call`. */ +DataFlowCallable viableCallable(DataFlowCall call) { result = call.getCallable() } + +private newtype TReturnKind = TNormalReturnKind() + +/** + * A return kind. A return kind describes how a value can be returned + * from a callable. For Python, this is simply a method return. + */ +class ReturnKind extends TReturnKind { + /** Gets a textual representation of this element. */ + string toString() { result = "return" } +} + +/** A data flow node that represents a value returned by a callable. */ +class ReturnNode extends CfgNode { + Return ret; + + // See `TaintTrackingImplementation::returnFlowStep` + ReturnNode() { node = ret.getValue().getAFlowNode() } + + /** Gets the kind of this return node. */ + ReturnKind getKind() { any() } + + override DataFlowCallable getEnclosingCallable() { + result.getScope().getAStmt() = ret // TODO: check nested function definitions + } +} + +/** A data flow node that represents the output of a call. */ +class OutNode extends CfgNode { + OutNode() { node instanceof CallNode } +} + +/** + * Gets a node that can read the value returned from `call` with return kind + * `kind`. + */ +OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { + call = result.getNode() and + kind = TNormalReturnKind() +} + +//-------- +// Type pruning +//-------- +newtype TDataFlowType = TAnyFlow() + +class DataFlowType extends TDataFlowType { + /** Gets a textual representation of this element. */ + string toString() { result = "DataFlowType" } +} + +/** A node that performs a type cast. */ +class CastNode extends Node { + CastNode() { none() } +} + +/** + * Holds if `t1` and `t2` are compatible, that is, whether data can flow from + * a node of type `t1` to a node of type `t2`. + */ +pragma[inline] +predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { any() } + +/** + * Gets the type of `node`. + */ +DataFlowType getNodeType(Node node) { result = TAnyFlow() } + +/** Gets a string representation of a type returned by `getErasedRepr`. */ +string ppReprType(DataFlowType t) { none() } + +//-------- +// Extra flow +//-------- +/** + * Holds if `pred` can flow to `succ`, by jumping from one callable to + * another. Additional steps specified by the configuration are *not* + * taken into account. + */ +predicate jumpStep(Node pred, Node succ) { + // As we have ESSA variables for global variables, + // we include ESSA flow steps involving global variables. + ( + pred.(EssaNode).getVar() instanceof GlobalSsaVariable + or + succ.(EssaNode).getVar() instanceof GlobalSsaVariable + ) and + EssaFlow::essaFlowStep(pred, succ) +} + +//-------- +// Field flow +//-------- +/** + * Holds if data can flow from `node1` to `node2` via an assignment to + * content `c`. + */ +predicate storeStep(Node node1, Content c, Node node2) { none() } + +/** + * Holds if data can flow from `node1` to `node2` via a read of content `c`. + */ +predicate readStep(Node node1, Content c, Node node2) { none() } + +/** + * Holds if values stored inside content `c` are cleared at node `n`. For example, + * any value stored inside `f` is cleared at the pre-update node associated with `x` + * in `x.f = newValue`. + */ +cached +predicate clearsContent(Node n, Content c) { none() } + +//-------- +// Fancy context-sensitive guards +//-------- +/** + * Holds if the node `n` is unreachable when the call context is `call`. + */ +predicate isUnreachableInCall(Node n, DataFlowCall call) { none() } + +//-------- +// Virtual dispatch with call context +//-------- +/** + * Gets a viable dispatch target of `call` in the context `ctx`. This is + * restricted to those `call`s for which a context might make a difference. + */ +DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) { none() } + +/** + * Holds if the set of viable implementations that can be called by `call` + * might be improved by knowing the call context. This is the case if the qualifier accesses a parameter of + * the enclosing callable `c` (including the implicit `this` parameter). + */ +predicate mayBenefitFromCallContext(DataFlowCall call, DataFlowCallable c) { none() } + +//-------- +// Misc +//-------- +/** + * Holds if `n` does not require a `PostUpdateNode` as it either cannot be + * modified or its modification cannot be observed, for example if it is a + * freshly created object that is not saved in a variable. + * + * This predicate is only used for consistency checks. + */ +predicate isImmutableOrUnobservable(Node n) { none() } + +int accessPathLimit() { result = 5 } + +/** Holds if `n` should be hidden from path explanations. */ +predicate nodeIsHidden(Node n) { none() } diff --git a/python/ql/src/experimental/dataflow/internal/DataFlowPublic.qll b/python/ql/src/experimental/dataflow/internal/DataFlowPublic.qll new file mode 100644 index 000000000000..0f950dd60521 --- /dev/null +++ b/python/ql/src/experimental/dataflow/internal/DataFlowPublic.qll @@ -0,0 +1,143 @@ +/** + * Provides Python-specific definitions for use in the data flow library. + */ + +import python +private import DataFlowPrivate + +/** + * IPA type for data flow nodes. + * + * Flow between SSA variables are computed in `Essa.qll` + * + * Flow from SSA variables to control flow nodes are generally via uses. + * + * Flow from control flow nodes to SSA variables are generally via assignments. + * + * The current implementation of these cross flows can be seen in `EssaTaintTracking`. + */ +newtype TNode = + /** A node corresponding to an SSA variable. */ + TEssaNode(EssaVariable var) or + /** A node corresponding to a control flow node. */ + TCfgNode(ControlFlowNode node) + +/** + * An element, viewed as a node in a data flow graph. Either an SSA variable + * (`EssaNode`) or a control flow node (`CfgNode`). + */ +class Node extends TNode { + /** Gets a textual representation of this element. */ + string toString() { result = "Data flow node" } + + /** Gets the scope of this node. */ + Scope getScope() { none() } + + /** Gets the enclosing callable of this node. */ + DataFlowCallable getEnclosingCallable() { result.getScope() = this.getScope() } + + /** Gets the location of this node */ + Location getLocation() { none() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +class EssaNode extends Node, TEssaNode { + EssaVariable var; + + EssaNode() { this = TEssaNode(var) } + + EssaVariable getVar() { result = var } + + /** Gets a textual representation of this element. */ + override string toString() { result = var.toString() } + + override Scope getScope() { result = var.getScope() } + + override Location getLocation() { result = var.getDefinition().getLocation() } +} + +class CfgNode extends Node, TCfgNode { + ControlFlowNode node; + + CfgNode() { this = TCfgNode(node) } + + ControlFlowNode getNode() { result = node } + + /** Gets a textual representation of this element. */ + override string toString() { result = node.toString() } + + override Scope getScope() { result = node.getScope() } + + override Location getLocation() { result = node.getLocation() } +} + +/** + * An expression, viewed as a node in a data flow graph. + * + * Note that because of control-flow splitting, one `Expr` may correspond + * to multiple `ExprNode`s, just like it may correspond to multiple + * `ControlFlow::Node`s. + */ +class ExprNode extends Node { } + +/** Gets a node corresponding to expression `e`. */ +ExprNode exprNode(DataFlowExpr e) { none() } + +/** + * The value of a parameter at function entry, viewed as a node in a data + * flow graph. + */ +class ParameterNode extends EssaNode { + ParameterNode() { var instanceof ParameterDefinition } + + /** + * Holds if this node is the parameter of callable `c` at the + * (zero-based) index `i`. + */ + predicate isParameterOf(DataFlowCallable c, int i) { + var.(ParameterDefinition).getDefiningNode() = c.getParameter(i) + } + + override DataFlowCallable getEnclosingCallable() { this.isParameterOf(result, _) } +} + +/** + * A guard that validates some expression. + * + * To use this in a configuration, extend the class and provide a + * characteristic predicate precisely specifying the guard, and override + * `checks` to specify what is being validated and in which branch. + * + * It is important that all extending classes in scope are disjoint. + */ +class BarrierGuard extends Expr { + // /** Holds if this guard validates `e` upon evaluating to `v`. */ + // abstract predicate checks(Expr e, AbstractValue v); + /** Gets a node guarded by this guard. */ + final ExprNode getAGuardedNode() { + none() + // exists(Expr e, AbstractValue v | + // this.checks(e, v) and + // this.controlsNode(result.getControlFlowNode(), e, v) + // ) + } +} + +/** + * A reference contained in an object. This is either a field or a property. + */ +class Content extends string { + Content() { this = "Content" } +} diff --git a/python/ql/src/experimental/dataflow/internal/DataFlowUtil.qll b/python/ql/src/experimental/dataflow/internal/DataFlowUtil.qll new file mode 100644 index 000000000000..5d4e5b05d4c5 --- /dev/null +++ b/python/ql/src/experimental/dataflow/internal/DataFlowUtil.qll @@ -0,0 +1,18 @@ +/** + * Contains utility functions for writing data flow queries + */ + +import DataFlowPrivate +import DataFlowPublic + +/** + * Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local + * (intra-procedural) step. + */ +predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFrom, nodeTo) } + +/** + * Holds if data flows from `source` to `sink` in zero or more local + * (intra-procedural) steps. + */ +predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) } diff --git a/python/ql/src/experimental/dataflow/internal/TaintTrackingPrivate.qll b/python/ql/src/experimental/dataflow/internal/TaintTrackingPrivate.qll new file mode 100644 index 000000000000..a7dc3dd77776 --- /dev/null +++ b/python/ql/src/experimental/dataflow/internal/TaintTrackingPrivate.qll @@ -0,0 +1,21 @@ +private import python +private import TaintTrackingPublic +private import experimental.dataflow.DataFlow +private import experimental.dataflow.internal.DataFlowPrivate + +/** + * Holds if `node` should be a barrier in all global taint flow configurations + * but not in local taint. + */ +predicate defaultTaintBarrier(DataFlow::Node node) { none() } + +/** + * Holds if the additional step from `pred` to `succ` should be included in all + * global taint flow configurations. + */ +predicate defaultAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) { + none() + // localAdditionalTaintStep(pred, succ) + // or + // succ = pred.(DataFlow::NonLocalJumpNode).getAJumpSuccessor(false) +} diff --git a/python/ql/src/experimental/dataflow/internal/TaintTrackingPublic.qll b/python/ql/src/experimental/dataflow/internal/TaintTrackingPublic.qll new file mode 100644 index 000000000000..28e254613caa --- /dev/null +++ b/python/ql/src/experimental/dataflow/internal/TaintTrackingPublic.qll @@ -0,0 +1,36 @@ +/** + * Provides classes for performing local (intra-procedural) and + * global (inter-procedural) taint-tracking analyses. + */ + +private import python +private import TaintTrackingPrivate +private import experimental.dataflow.DataFlow + +// /** +// * Holds if taint propagates from `source` to `sink` in zero or more local +// * (intra-procedural) steps. +// */ +// predicate localTaint(DataFlow::Node source, DataFlow::Node sink) { localTaintStep*(source, sink) } + +// // /** +// // * Holds if taint can flow from `e1` to `e2` in zero or more +// // * local (intra-procedural) steps. +// // */ +// // predicate localExprTaint(Expr e1, Expr e2) { +// // localTaint(DataFlow::exprNode(e1), DataFlow::exprNode(e2)) +// // } + +// // /** A member (property or field) that is tainted if its containing object is tainted. */ +// // abstract class TaintedMember extends AssignableMember { } + +// /** +// * Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local +// * (intra-procedural) step. +// */ +// predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { +// // Ordinary data flow +// DataFlow::localFlowStep(nodeFrom, nodeTo) +// or +// localAdditionalTaintStep(nodeFrom, nodeTo) +// } \ No newline at end of file diff --git a/python/ql/src/experimental/dataflow/internal/readme.md b/python/ql/src/experimental/dataflow/internal/readme.md new file mode 100644 index 000000000000..88cb5ad3ed0c --- /dev/null +++ b/python/ql/src/experimental/dataflow/internal/readme.md @@ -0,0 +1,138 @@ +# Using the shared dataflow library + +## File organisation + +The files currently live in `experimental` (whereas the existing implementation lives in `semmle\python\dataflow`). + +In there is found `DataFlow.qll`, `DataFlow2.qll` etc. which refer to `internal\DataFlowImpl`, `internal\DataFlowImpl2` etc. respectively. The `DataFlowImplN`-files are all identical copies to avoid mutual recursion. They start off by including two files `internal\DataFlowImplCommon` and `internal\DataFlowImplSpecific`. The former contains all the language-agnostic definitions, while the latter is where we describe our favorite language. `Sepcific` simply forwards to two other files `internal\DataFlowPrivate.qll` and `internal\DataFlowPublic.qll`. Definitions in the former will be hidden behind a `private` modifier, while those in the latter can be referred to in data flow queries. For instance, the definition of `DataFlow::Node` should likely be in `DataFlowPublic.qll`. + +## Define the dataflow graph + +In order to use the dataflow library, we need to define the dataflow graph, +that is define the nodes and the edges. + +### Define the nodes + +The nodes are defined in the type `DataFlow::Node` (found in `DataFlowPublic.qll`). +This should likely be an IPA type, so we can extend it as needed. + +Typical cases needed to construct the call graph include + - argument node + - parameter node + - return node + +Typical extensions include + - postupdate nodes + - implicit `this`-nodes + +### Define the edges + +The edges split into local flow (within a function) and global flow (the call graph, between functions/procedures). + +Extra flow, such as reading from and writing to global variables, can be captured in `jumpStep`. +The local flow should be obtainalble from an SSA computation. +Local flow nodes are generally either control flow nodes or SSA variables. +Flow from control flow nodes to SSA variables comes from SSA variable definitions, while flow from SSA variables to control flow nodes comes from def-use pairs. + +The global flow should be obtainable from a `PointsTo` analysis. It is specified via `viableCallable` and +`getAnOutNode`. Consider making `ReturnKind` a singleton IPA type as in java. + +Global flow includes local flow within a consistent call context. Thus, for local flow to count as global flow, all relevant nodes should implement `getEnclosingCallable`. + +If complicated dispatch needs to be modelled, try using the `[reduced|pruned]viable*` predicates. + +## Field flow + +To track flow through fields we need to provide a model of fields, that is the `Content` class. + +Field access is specified via `read_step` and `store_step`. + +Work is being done to make field flow handle lists and dictionaries and the like. + +`PostUpdateNode`s become important when field flow is used, as they track modifications to fields resulting from function calls. + +## Type pruning + +If type information is available, flows can be discarded on the grounds of type mismatch. + +Tracked types are given by the class `DataFlowType` and the predicate `getTypeBound`, and compatibility is recorded in the predicate `compatibleTypes`. +If type pruning is not used, `compatibleTypes` should be implemented as `any`; if it is implemented, say, as `none`, all flows will be pruned. + +Further, possible casts are given by the class `CastNode`. + +--- + +# Plan + +## Stage I, data flow + +### Phase 0, setup +Define minimal IPA type for `DataFlow::Node` +Define all required predicates empty (via `none()`), +except `compatibleTypes` which should be `any()`. +Define `ReturnKind`, `DataFlowType`, and `Content` as singleton IPA types. + + +### Phase 1, local flow +Implement `simpleLocalFlowStep` based on the existing SSA computation + +### Phase 2, local flow +Implement `viableCallable` and `getAnOutNode` based on the existing predicate `PointsTo`. + +### Phase 3, field flow +Redefine `Content` and implement `read_step` and `store_step`. + +Review use of post-update nodes. + +### Phase 4, type pruning +Use type trackers to obtain relevant type information and redefine `DataFlowType` to contain appropriate cases. Record the type information in `getTypeBound`. + +Implement `compatibleTypes` (perhaps simply as the identity). + +If necessary, re-implement `getErasedRepr` and `ppReprType`. + +If necessary, redefine `CastNode`. + +### Phase 5, bonus +Review possible use of `[reduced|pruned]viable*` predicates. + +Review need for more elaborate `ReturnKind`. + +Review need for non-empty `jumpStep`. + +Review need for non-empty `isUnreachableInCall`. + +## Stage II, taint tracking + +# Phase 0, setup +Implement all predicates empty. + +# Phase 1, experiments +Try recovering an existing taint tracking query by implementing sources, sinks, sanitizers, and barriers. + +--- + +# Status + +## Achieved + +- Copy of shared library; implemented enough predicates to make it compile. +- Simple flow into, out of, and through functions. +- Some tests, in particular a sceleton for something comprehensive. + +## TODO + +- Implementation has largely been done by finding a plausibly-sounding predicate in the python library to refer to. We should review that we actually have the intended semantics in all places. +- Comprehensive testing. +- The regression tests track the value of guards in order to eliminate impossible data flow. We currently have regressions because of this. We cannot readily replicate the existing method, as it uses the interdefinedness of data flow and taint tracking (there is a boolean taint kind). C++ [does something similar](https://github.com/github/codeql/blob/master/cpp/ql/src/semmle/code/cpp/controlflow/internal/ConstantExprs.qll#L27-L36) for eliminating impossible control flow, which we might be able to replicate (they infer values of "interesting" control flow nodes, which are those needed to determine values of guards). +- Flow for some syntactic constructs are done via extra taint steps in the existing implementation, we should find a way to get data flow for it. Some of this should be covered by field flow. +- A document is being written about proper use of the shared data flow library, this should be adhered to. In particular, we should consider replacing def-use with def-to-first-use and use-to-next-use in local flow. +- We seem to get duplicated results for global flow, as well as flow with and without type (so four times the "unique" results). +- We currently consider control flow nodes like exit nodes for functions, we should probably filter down which ones are of interest. +- We should probably override ToString for a number of data flow nodes. +- Test flow through classes, constructors and methods. +- What happens with named arguments? What does C# do? +- What should the enclosable callable for global variables be? C++ [makes it the variable itself](https://github.com/github/codeql/blob/master/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll#L417), C# seems to not have nodes for these but only for their reads and writes. +- Is `yield` another return type? If not, how is it handled? +- Should `OutNode` include magic function calls? +- Consider creating an internal abstract class for nodes as C# does. Among other things, this can help the optimizer by stating that `getEnclosingCallable` [is functional](https://github.com/github/codeql/blob/master/csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowPublic.qll#L62). \ No newline at end of file diff --git a/python/ql/src/experimental/dataflow/internal/tainttracking1/TaintTrackingImpl.qll b/python/ql/src/experimental/dataflow/internal/tainttracking1/TaintTrackingImpl.qll new file mode 100644 index 000000000000..af0d0fec53a6 --- /dev/null +++ b/python/ql/src/experimental/dataflow/internal/tainttracking1/TaintTrackingImpl.qll @@ -0,0 +1,115 @@ +/** + * Provides an implementation of global (interprocedural) taint tracking. + * This file re-exports the local (intraprocedural) taint-tracking analysis + * from `TaintTrackingParameter::Public` and adds a global analysis, mainly + * exposed through the `Configuration` class. For some languages, this file + * exists in several identical copies, allowing queries to use multiple + * `Configuration` classes that depend on each other without introducing + * mutual recursion among those configurations. + */ + +import TaintTrackingParameter::Public +private import TaintTrackingParameter::Private + +/** + * A configuration of interprocedural taint tracking analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the taint tracking library must define its own unique extension of + * this abstract class. + * + * A taint-tracking configuration is a special data flow configuration + * (`DataFlow::Configuration`) that allows for flow through nodes that do not + * necessarily preserve values but are still relevant from a taint tracking + * perspective. (For example, string concatenation, where one of the operands + * is tainted.) + * + * To create a configuration, extend this class with a subclass whose + * characteristic predicate is a unique singleton string. For example, write + * + * ```ql + * class MyAnalysisConfiguration extends TaintTracking::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isSanitizer`. + * // Optionally override `isSanitizerIn`. + * // Optionally override `isSanitizerOut`. + * // Optionally override `isSanitizerGuard`. + * // Optionally override `isAdditionalTaintStep`. + * } + * ``` + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ```ql + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but it is unsupported to depend on + * another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the + * overridden predicates that define sources, sinks, or additional steps. + * Instead, the dependency should go to a `TaintTracking2::Configuration` or a + * `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc. + */ +abstract class Configuration extends DataFlow::Configuration { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant taint source. + * + * The smaller this predicate is, the faster `hasFlow()` will converge. + */ + // overridden to provide taint-tracking specific qldoc + abstract override predicate isSource(DataFlow::Node source); + + /** + * Holds if `sink` is a relevant taint sink. + * + * The smaller this predicate is, the faster `hasFlow()` will converge. + */ + // overridden to provide taint-tracking specific qldoc + abstract override predicate isSink(DataFlow::Node sink); + + /** Holds if the node `node` is a taint sanitizer. */ + predicate isSanitizer(DataFlow::Node node) { none() } + + final override predicate isBarrier(DataFlow::Node node) { + isSanitizer(node) or + defaultTaintBarrier(node) + } + + /** Holds if data flow into `node` is prohibited. */ + predicate isSanitizerIn(DataFlow::Node node) { none() } + + final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) } + + /** Holds if data flow out of `node` is prohibited. */ + predicate isSanitizerOut(DataFlow::Node node) { none() } + + final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) } + + /** Holds if data flow through nodes guarded by `guard` is prohibited. */ + predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() } + + final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) } + + /** + * Holds if the additional taint propagation step from `node1` to `node2` + * must be taken into account in the analysis. + */ + predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() } + + final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + isAdditionalTaintStep(node1, node2) or + defaultAdditionalTaintStep(node1, node2) + } + + /** + * Holds if taint may flow from `source` to `sink` for this configuration. + */ + // overridden to provide taint-tracking specific qldoc + override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) { + super.hasFlow(source, sink) + } +} diff --git a/python/ql/src/experimental/dataflow/internal/tainttracking1/TaintTrackingParameter.qll b/python/ql/src/experimental/dataflow/internal/tainttracking1/TaintTrackingParameter.qll new file mode 100644 index 000000000000..7f9fb029103c --- /dev/null +++ b/python/ql/src/experimental/dataflow/internal/tainttracking1/TaintTrackingParameter.qll @@ -0,0 +1,6 @@ +import experimental.dataflow.internal.TaintTrackingPublic as Public + +module Private { + import experimental.dataflow.DataFlow::DataFlow as DataFlow + import experimental.dataflow.internal.TaintTrackingPrivate +} \ No newline at end of file diff --git a/python/ql/test/experimental/dataflow/basic/allFlowsConfig.qll b/python/ql/test/experimental/dataflow/basic/allFlowsConfig.qll new file mode 100644 index 000000000000..155fd591c887 --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/allFlowsConfig.qll @@ -0,0 +1,13 @@ +import experimental.dataflow.DataFlow + +/** + * A configuration to find all flows. + * To be used on tiny programs. + */ +class AllFlowsConfig extends DataFlow::Configuration { + AllFlowsConfig() { this = "AllFlowsConfig" } + + override predicate isSource(DataFlow::Node node) { any() } + + override predicate isSink(DataFlow::Node node) { any() } +} diff --git a/python/ql/test/experimental/dataflow/basic/callGraph.expected b/python/ql/test/experimental/dataflow/basic/callGraph.expected new file mode 100644 index 000000000000..99500138ca07 --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/callGraph.expected @@ -0,0 +1,3 @@ +| test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | SSA variable x | +| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | diff --git a/python/ql/test/experimental/dataflow/basic/callGraph.ql b/python/ql/test/experimental/dataflow/basic/callGraph.ql new file mode 100644 index 000000000000..0d0a02798911 --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/callGraph.ql @@ -0,0 +1,9 @@ +import callGraphConfig + +from + DataFlow::Node source, + DataFlow::Node sink +where + exists(CallGraphConfig cfg | cfg.hasFlow(source, sink)) +select + source, sink diff --git a/python/ql/test/experimental/dataflow/basic/callGraphConfig.qll b/python/ql/test/experimental/dataflow/basic/callGraphConfig.qll new file mode 100644 index 000000000000..9866ac1cdbe9 --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/callGraphConfig.qll @@ -0,0 +1,20 @@ +import experimental.dataflow.DataFlow + +/** + * A configuration to find the call graph edges. + */ +class CallGraphConfig extends DataFlow::Configuration { + CallGraphConfig() { this = "CallGraphConfig" } + + override predicate isSource(DataFlow::Node node) { + node instanceof DataFlow::ReturnNode + or + node instanceof DataFlow::ArgumentNode + } + + override predicate isSink(DataFlow::Node node) { + node instanceof DataFlow::OutNode + or + node instanceof DataFlow::ParameterNode + } +} diff --git a/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected b/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected new file mode 100644 index 000000000000..d45952975b92 --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected @@ -0,0 +1,2 @@ +| test.py:1:19:1:19 | SSA variable x | +| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | diff --git a/python/ql/test/experimental/dataflow/basic/callGraphSinks.ql b/python/ql/test/experimental/dataflow/basic/callGraphSinks.ql new file mode 100644 index 000000000000..ef635f9afa6b --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/callGraphSinks.ql @@ -0,0 +1,5 @@ +import callGraphConfig + +from DataFlow::Node sink +where exists(CallGraphConfig cfg | cfg.isSink(sink)) +select sink \ No newline at end of file diff --git a/python/ql/test/experimental/dataflow/basic/callGraphSources.expected b/python/ql/test/experimental/dataflow/basic/callGraphSources.expected new file mode 100644 index 000000000000..4023ba8f3ea1 --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/callGraphSources.expected @@ -0,0 +1,2 @@ +| test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:7:19:7:19 | ControlFlowNode for a | diff --git a/python/ql/test/experimental/dataflow/basic/callGraphSources.ql b/python/ql/test/experimental/dataflow/basic/callGraphSources.ql new file mode 100644 index 000000000000..de58e5a22696 --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/callGraphSources.ql @@ -0,0 +1,5 @@ +import callGraphConfig + +from DataFlow::Node source +where exists(CallGraphConfig cfg | cfg.isSource(source)) +select source \ No newline at end of file diff --git a/python/ql/test/experimental/dataflow/basic/global.expected b/python/ql/test/experimental/dataflow/basic/global.expected new file mode 100644 index 000000000000..f42042f7871c --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/global.expected @@ -0,0 +1,95 @@ +| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | Exit node for Module test | +| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:7:1:7:1 | GSSA Variable b | +| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | Exit node for Module test | +| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:7:1:7:1 | GSSA Variable b | +| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:0:0:0:0 | GSSA Variable b | test.py:0:0:0:0 | Exit node for Module test | +| test.py:0:0:0:0 | GSSA Variable b | test.py:7:1:7:1 | GSSA Variable b | +| test.py:0:0:0:0 | GSSA Variable b | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:0:0:0:0 | Exit node for Module test | +| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id | +| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:1:7:1 | GSSA Variable b | +| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | +| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:0:0:0:0 | Exit node for Module test | +| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:1:7:1 | GSSA Variable b | +| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | +| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:1:19:1:19 | SSA variable x | test.py:0:0:0:0 | Exit node for Module test | +| test.py:1:19:1:19 | SSA variable x | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y | +| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x | +| test.py:1:19:1:19 | SSA variable x | test.py:3:3:3:3 | SSA variable z | +| test.py:1:19:1:19 | SSA variable x | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:1:19:1:19 | SSA variable x | test.py:7:1:7:1 | GSSA Variable b | +| test.py:1:19:1:19 | SSA variable x | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:2:3:2:3 | SSA variable y | test.py:0:0:0:0 | Exit node for Module test | +| test.py:2:3:2:3 | SSA variable y | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:2:3:2:3 | SSA variable y | test.py:3:3:3:3 | SSA variable z | +| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:2:3:2:3 | SSA variable y | test.py:7:1:7:1 | GSSA Variable b | +| test.py:2:3:2:3 | SSA variable y | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:0:0:0:0 | Exit node for Module test | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:7:1:7:1 | GSSA Variable b | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:3:3:3:3 | SSA variable z | test.py:0:0:0:0 | Exit node for Module test | +| test.py:3:3:3:3 | SSA variable z | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:3:3:3:3 | SSA variable z | test.py:7:1:7:1 | GSSA Variable b | +| test.py:3:3:3:3 | SSA variable z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:0:0:0:0 | Exit node for Module test | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:7:1:7:1 | GSSA Variable b | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:4:10:4:10 | ControlFlowNode for z | test.py:0:0:0:0 | Exit node for Module test | +| test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:1:7:1 | GSSA Variable b | +| test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:6:1:6:1 | GSSA Variable a | test.py:0:0:0:0 | Exit node for Module test | +| test.py:6:1:6:1 | GSSA Variable a | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:6:1:6:1 | GSSA Variable a | test.py:1:19:1:19 | SSA variable x | +| test.py:6:1:6:1 | GSSA Variable a | test.py:2:3:2:3 | SSA variable y | +| test.py:6:1:6:1 | GSSA Variable a | test.py:2:7:2:7 | ControlFlowNode for x | +| test.py:6:1:6:1 | GSSA Variable a | test.py:3:3:3:3 | SSA variable z | +| test.py:6:1:6:1 | GSSA Variable a | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:6:1:6:1 | GSSA Variable a | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:6:1:6:1 | GSSA Variable a | test.py:7:1:7:1 | GSSA Variable b | +| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a | +| test.py:6:1:6:1 | GSSA Variable a | test.py:7:19:7:19 | ControlFlowNode for a | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:0:0:0:0 | Exit node for Module test | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:1:19:1:19 | SSA variable x | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:2:3:2:3 | SSA variable y | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:2:7:2:7 | ControlFlowNode for x | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:3:3:3:3 | SSA variable z | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | GSSA Variable a | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:1:7:1 | GSSA Variable b | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:5:7:20 | GSSA Variable a | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:19:7:19 | ControlFlowNode for a | +| test.py:7:1:7:1 | GSSA Variable b | test.py:0:0:0:0 | Exit node for Module test | +| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:0:0:0:0 | Exit node for Module test | +| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | GSSA Variable b | +| test.py:7:5:7:20 | GSSA Variable a | test.py:0:0:0:0 | Exit node for Module test | +| test.py:7:19:7:19 | ControlFlowNode for a | test.py:0:0:0:0 | Exit node for Module test | +| test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | SSA variable x | +| test.py:7:19:7:19 | ControlFlowNode for a | test.py:2:3:2:3 | SSA variable y | +| test.py:7:19:7:19 | ControlFlowNode for a | test.py:2:7:2:7 | ControlFlowNode for x | +| test.py:7:19:7:19 | ControlFlowNode for a | test.py:3:3:3:3 | SSA variable z | +| test.py:7:19:7:19 | ControlFlowNode for a | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:7:19:7:19 | ControlFlowNode for a | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:1:7:1 | GSSA Variable b | +| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | diff --git a/python/ql/test/experimental/dataflow/basic/global.ql b/python/ql/test/experimental/dataflow/basic/global.ql new file mode 100644 index 000000000000..6f57575819a7 --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/global.ql @@ -0,0 +1,10 @@ +import allFlowsConfig + +from + DataFlow::Node source, + DataFlow::Node sink +where + source != sink and + exists(AllFlowsConfig cfg | cfg.hasFlow(source, sink)) +select + source, sink diff --git a/python/ql/test/experimental/dataflow/basic/globalStep.expected b/python/ql/test/experimental/dataflow/basic/globalStep.expected new file mode 100644 index 000000000000..9cf3a8c4d898 --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/globalStep.expected @@ -0,0 +1,118 @@ +| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | Exit node for Module test | +| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | Exit node for Module test | +| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | Exit node for Module test | +| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | Exit node for Module test | +| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:0:0:0:0 | GSSA Variable b | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:0:0:0:0 | GSSA Variable b | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id | +| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id | +| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:0:0:0:0 | Exit node for Module test | +| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:0:0:0:0 | Exit node for Module test | +| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | +| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | +| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:1:19:1:19 | SSA variable x | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:1:19:1:19 | SSA variable x | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:1:19:1:19 | SSA variable x | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:1:19:1:19 | SSA variable x | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y | +| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y | +| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y | +| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y | +| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x | +| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x | +| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x | +| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x | +| test.py:1:19:1:19 | SSA variable x | test.py:3:3:3:3 | SSA variable z | +| test.py:1:19:1:19 | SSA variable x | test.py:3:3:3:3 | SSA variable z | +| test.py:1:19:1:19 | SSA variable x | test.py:3:3:3:3 | SSA variable z | +| test.py:1:19:1:19 | SSA variable x | test.py:3:3:3:3 | SSA variable z | +| test.py:1:19:1:19 | SSA variable x | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:1:19:1:19 | SSA variable x | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:1:19:1:19 | SSA variable x | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:1:19:1:19 | SSA variable x | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:2:3:2:3 | SSA variable y | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:2:3:2:3 | SSA variable y | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:2:3:2:3 | SSA variable y | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:2:3:2:3 | SSA variable y | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:2:3:2:3 | SSA variable y | test.py:3:3:3:3 | SSA variable z | +| test.py:2:3:2:3 | SSA variable y | test.py:3:3:3:3 | SSA variable z | +| test.py:2:3:2:3 | SSA variable y | test.py:3:3:3:3 | SSA variable z | +| test.py:2:3:2:3 | SSA variable y | test.py:3:3:3:3 | SSA variable z | +| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:3:3:3:3 | SSA variable z | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:3:3:3:3 | SSA variable z | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:3:3:3:3 | SSA variable z | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:3:3:3:3 | SSA variable z | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a | +| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a | +| test.py:6:1:6:1 | GSSA Variable a | test.py:7:19:7:19 | ControlFlowNode for a | +| test.py:6:1:6:1 | GSSA Variable a | test.py:7:19:7:19 | ControlFlowNode for a | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | GSSA Variable a | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | GSSA Variable a | +| test.py:7:1:7:1 | GSSA Variable b | test.py:0:0:0:0 | Exit node for Module test | +| test.py:7:1:7:1 | GSSA Variable b | test.py:0:0:0:0 | Exit node for Module test | +| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | GSSA Variable b | +| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | GSSA Variable b | +| test.py:7:5:7:20 | GSSA Variable a | test.py:0:0:0:0 | Exit node for Module test | +| test.py:7:5:7:20 | GSSA Variable a | test.py:0:0:0:0 | Exit node for Module test | +| test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | SSA variable x | +| test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | SSA variable x | +| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | diff --git a/python/ql/test/experimental/dataflow/basic/globalStep.ql b/python/ql/test/experimental/dataflow/basic/globalStep.ql new file mode 100644 index 000000000000..f583a9f1a817 --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/globalStep.ql @@ -0,0 +1,9 @@ +import allFlowsConfig + +from + DataFlow::PathNode fromNode, + DataFlow::PathNode toNode +where + toNode = fromNode.getASuccessor() +select + fromNode, toNode diff --git a/python/ql/test/experimental/dataflow/basic/local.expected b/python/ql/test/experimental/dataflow/basic/local.expected new file mode 100644 index 000000000000..052c52f90fc6 --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/local.expected @@ -0,0 +1,51 @@ +| test.py:0:0:0:0 | Entry node for Module test | test.py:0:0:0:0 | Entry node for Module test | +| test.py:0:0:0:0 | Exit node for Module test | test.py:0:0:0:0 | Exit node for Module test | +| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | GSSA Variable __name__ | +| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | GSSA Variable __package__ | +| test.py:0:0:0:0 | GSSA Variable b | test.py:0:0:0:0 | GSSA Variable b | +| test.py:0:0:0:0 | SSA variable $ | test.py:0:0:0:0 | Exit node for Module test | +| test.py:0:0:0:0 | SSA variable $ | test.py:0:0:0:0 | SSA variable $ | +| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | +| test.py:1:1:1:21 | Entry node for Function obfuscated_id | test.py:1:1:1:21 | Entry node for Function obfuscated_id | +| test.py:1:1:1:21 | Exit node for Function obfuscated_id | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | +| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:1:5:1:17 | GSSA Variable obfuscated_id | +| test.py:1:19:1:19 | ControlFlowNode for x | test.py:1:19:1:19 | ControlFlowNode for x | +| test.py:1:19:1:19 | SSA variable x | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:1:19:1:19 | SSA variable x | test.py:1:19:1:19 | SSA variable x | +| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y | +| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x | +| test.py:1:19:1:19 | SSA variable x | test.py:3:3:3:3 | SSA variable z | +| test.py:1:19:1:19 | SSA variable x | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:2:3:2:3 | ControlFlowNode for y | test.py:2:3:2:3 | ControlFlowNode for y | +| test.py:2:3:2:3 | SSA variable y | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:2:3:2:3 | SSA variable y | test.py:2:3:2:3 | SSA variable y | +| test.py:2:3:2:3 | SSA variable y | test.py:3:3:3:3 | SSA variable z | +| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:7:2:7 | ControlFlowNode for x | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:3:3:3:3 | ControlFlowNode for z | test.py:3:3:3:3 | ControlFlowNode for z | +| test.py:3:3:3:3 | SSA variable z | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:3:3:3:3 | SSA variable z | test.py:3:3:3:3 | SSA variable z | +| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:4:3:4:10 | ControlFlowNode for Return | test.py:4:3:4:10 | ControlFlowNode for Return | +| test.py:4:10:4:10 | ControlFlowNode for z | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:6:1:6:1 | ControlFlowNode for a | test.py:6:1:6:1 | ControlFlowNode for a | +| test.py:6:1:6:1 | GSSA Variable a | test.py:6:1:6:1 | GSSA Variable a | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | +| test.py:7:1:7:1 | ControlFlowNode for b | test.py:7:1:7:1 | ControlFlowNode for b | +| test.py:7:1:7:1 | GSSA Variable b | test.py:7:1:7:1 | GSSA Variable b | +| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | +| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:7:5:7:20 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a | +| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a | diff --git a/python/ql/test/experimental/dataflow/basic/local.ql b/python/ql/test/experimental/dataflow/basic/local.ql new file mode 100644 index 000000000000..40aa5c403e19 --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/local.ql @@ -0,0 +1,9 @@ +import experimental.dataflow.DataFlow + +from + DataFlow::Node fromNode, + DataFlow::Node toNode +where + DataFlow::localFlow(fromNode, toNode) +select + fromNode, toNode diff --git a/python/ql/test/experimental/dataflow/basic/localStep.expected b/python/ql/test/experimental/dataflow/basic/localStep.expected new file mode 100644 index 000000000000..24509950e885 --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/localStep.expected @@ -0,0 +1,9 @@ +| test.py:0:0:0:0 | SSA variable $ | test.py:0:0:0:0 | Exit node for Module test | +| test.py:1:19:1:19 | SSA variable x | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x | +| test.py:2:3:2:3 | SSA variable y | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y | +| test.py:3:3:3:3 | SSA variable z | test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z | diff --git a/python/ql/test/experimental/dataflow/basic/localStep.ql b/python/ql/test/experimental/dataflow/basic/localStep.ql new file mode 100644 index 000000000000..b8a9941b99bf --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/localStep.ql @@ -0,0 +1,9 @@ +import experimental.dataflow.DataFlow + +from + DataFlow::Node fromNode, + DataFlow::Node toNode +where + DataFlow::localFlowStep(fromNode, toNode) +select + fromNode, toNode diff --git a/python/ql/test/experimental/dataflow/basic/maximalFlows.expected b/python/ql/test/experimental/dataflow/basic/maximalFlows.expected new file mode 100644 index 000000000000..ef3d9e3bfbc3 --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/maximalFlows.expected @@ -0,0 +1,13 @@ +| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:7:1:7:1 | GSSA Variable b | +| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:7:1:7:1 | GSSA Variable b | +| test.py:0:0:0:0 | GSSA Variable b | test.py:7:1:7:1 | GSSA Variable b | +| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:1:7:1 | GSSA Variable b | +| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:1:19:1:19 | SSA variable x | test.py:7:1:7:1 | GSSA Variable b | +| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:2:3:2:3 | SSA variable y | test.py:7:1:7:1 | GSSA Variable b | +| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:3:3:3:3 | SSA variable z | test.py:7:1:7:1 | GSSA Variable b | +| test.py:6:1:6:1 | GSSA Variable a | test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:6:1:6:1 | GSSA Variable a | test.py:7:1:7:1 | GSSA Variable b | +| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a | diff --git a/python/ql/test/experimental/dataflow/basic/maximalFlows.ql b/python/ql/test/experimental/dataflow/basic/maximalFlows.ql new file mode 100644 index 000000000000..6e183dc393bc --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/maximalFlows.ql @@ -0,0 +1,10 @@ +import maximalFlowsConfig + +from + DataFlow::Node source, + DataFlow::Node sink +where + source != sink and + exists(MaximalFlowsConfig cfg | cfg.hasFlow(source, sink)) +select + source, sink diff --git a/python/ql/test/experimental/dataflow/basic/maximalFlowsConfig.qll b/python/ql/test/experimental/dataflow/basic/maximalFlowsConfig.qll new file mode 100644 index 000000000000..9db8ec67e94d --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/maximalFlowsConfig.qll @@ -0,0 +1,25 @@ +import experimental.dataflow.DataFlow + +/** + * A configuration to find all "maximal" flows. + * To be used on small programs. + */ +class MaximalFlowsConfig extends DataFlow::Configuration { + MaximalFlowsConfig() { this = "AllFlowsConfig" } + + override predicate isSource(DataFlow::Node node) { + node instanceof DataFlow::ParameterNode + or + node instanceof DataFlow::EssaNode and + not exists(DataFlow::EssaNode pred | + DataFlow::localFlowStep(pred, node) + ) + } + + override predicate isSink(DataFlow::Node node) { + node instanceof DataFlow::ReturnNode + or + node instanceof DataFlow::EssaNode and + not exists(node.(DataFlow::EssaNode).getVar().getASourceUse()) + } +} diff --git a/python/ql/test/experimental/dataflow/basic/sinks.expected b/python/ql/test/experimental/dataflow/basic/sinks.expected new file mode 100644 index 000000000000..bcf55bee27c0 --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/sinks.expected @@ -0,0 +1,30 @@ +| test.py:0:0:0:0 | Entry node for Module test | +| test.py:0:0:0:0 | Exit node for Module test | +| test.py:0:0:0:0 | GSSA Variable __name__ | +| test.py:0:0:0:0 | GSSA Variable __package__ | +| test.py:0:0:0:0 | GSSA Variable b | +| test.py:0:0:0:0 | SSA variable $ | +| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | +| test.py:1:1:1:21 | Entry node for Function obfuscated_id | +| test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | +| test.py:1:5:1:17 | GSSA Variable obfuscated_id | +| test.py:1:19:1:19 | ControlFlowNode for x | +| test.py:1:19:1:19 | SSA variable x | +| test.py:2:3:2:3 | ControlFlowNode for y | +| test.py:2:3:2:3 | SSA variable y | +| test.py:2:7:2:7 | ControlFlowNode for x | +| test.py:3:3:3:3 | ControlFlowNode for z | +| test.py:3:3:3:3 | SSA variable z | +| test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:4:3:4:10 | ControlFlowNode for Return | +| test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:6:1:6:1 | ControlFlowNode for a | +| test.py:6:1:6:1 | GSSA Variable a | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | +| test.py:7:1:7:1 | ControlFlowNode for b | +| test.py:7:1:7:1 | GSSA Variable b | +| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | +| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:7:5:7:20 | GSSA Variable a | +| test.py:7:19:7:19 | ControlFlowNode for a | diff --git a/python/ql/test/experimental/dataflow/basic/sinks.ql b/python/ql/test/experimental/dataflow/basic/sinks.ql new file mode 100644 index 000000000000..9b4534b98708 --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/sinks.ql @@ -0,0 +1,5 @@ +import allFlowsConfig + +from DataFlow::Node sink +where exists(AllFlowsConfig cfg | cfg.isSink(sink)) +select sink \ No newline at end of file diff --git a/python/ql/test/experimental/dataflow/basic/sources.expected b/python/ql/test/experimental/dataflow/basic/sources.expected new file mode 100644 index 000000000000..bcf55bee27c0 --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/sources.expected @@ -0,0 +1,30 @@ +| test.py:0:0:0:0 | Entry node for Module test | +| test.py:0:0:0:0 | Exit node for Module test | +| test.py:0:0:0:0 | GSSA Variable __name__ | +| test.py:0:0:0:0 | GSSA Variable __package__ | +| test.py:0:0:0:0 | GSSA Variable b | +| test.py:0:0:0:0 | SSA variable $ | +| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | +| test.py:1:1:1:21 | Entry node for Function obfuscated_id | +| test.py:1:1:1:21 | Exit node for Function obfuscated_id | +| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | +| test.py:1:5:1:17 | GSSA Variable obfuscated_id | +| test.py:1:19:1:19 | ControlFlowNode for x | +| test.py:1:19:1:19 | SSA variable x | +| test.py:2:3:2:3 | ControlFlowNode for y | +| test.py:2:3:2:3 | SSA variable y | +| test.py:2:7:2:7 | ControlFlowNode for x | +| test.py:3:3:3:3 | ControlFlowNode for z | +| test.py:3:3:3:3 | SSA variable z | +| test.py:3:7:3:7 | ControlFlowNode for y | +| test.py:4:3:4:10 | ControlFlowNode for Return | +| test.py:4:10:4:10 | ControlFlowNode for z | +| test.py:6:1:6:1 | ControlFlowNode for a | +| test.py:6:1:6:1 | GSSA Variable a | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | +| test.py:7:1:7:1 | ControlFlowNode for b | +| test.py:7:1:7:1 | GSSA Variable b | +| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | +| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:7:5:7:20 | GSSA Variable a | +| test.py:7:19:7:19 | ControlFlowNode for a | diff --git a/python/ql/test/experimental/dataflow/basic/sources.ql b/python/ql/test/experimental/dataflow/basic/sources.ql new file mode 100644 index 000000000000..f47fa31d62ea --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/sources.ql @@ -0,0 +1,5 @@ +import allFlowsConfig + +from DataFlow::Node source +where exists(AllFlowsConfig cfg | cfg.isSource(source)) +select source \ No newline at end of file diff --git a/python/ql/test/experimental/dataflow/basic/test.py b/python/ql/test/experimental/dataflow/basic/test.py new file mode 100644 index 000000000000..416467d9dc9d --- /dev/null +++ b/python/ql/test/experimental/dataflow/basic/test.py @@ -0,0 +1,7 @@ +def obfuscated_id(x): + y = x + z = y + return z + +a = 42 +b = obfuscated_id(a) diff --git a/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected b/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected new file mode 100644 index 000000000000..eaf7a166e236 --- /dev/null +++ b/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected @@ -0,0 +1,103 @@ +uniqueEnclosingCallable +| test.py:0:0:0:0 | Exit node for Module test | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | GSSA Variable __name__ | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | GSSA Variable __package__ | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | GSSA Variable test23 | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | GSSA Variable test24 | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | GSSA Variable test_truth | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | GSSA Variable test_update_extend | Node should have one enclosing callable but has 0. | +| test.py:0:0:0:0 | SSA variable $ | Node should have one enclosing callable but has 0. | +| test.py:6:1:6:12 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:6:5:6:9 | GSSA Variable test1 | Node should have one enclosing callable but has 0. | +| test.py:9:1:9:12 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:9:5:9:9 | GSSA Variable test2 | Node should have one enclosing callable but has 0. | +| test.py:13:1:13:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:13:5:13:10 | GSSA Variable source | Node should have one enclosing callable but has 0. | +| test.py:16:1:16:14 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:16:5:16:8 | GSSA Variable sink | Node should have one enclosing callable but has 0. | +| test.py:19:1:19:12 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:19:5:19:9 | GSSA Variable test3 | Node should have one enclosing callable but has 0. | +| test.py:23:1:23:12 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:23:5:23:9 | GSSA Variable test4 | Node should have one enclosing callable but has 0. | +| test.py:27:1:27:12 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:27:5:27:9 | GSSA Variable test5 | Node should have one enclosing callable but has 0. | +| test.py:31:1:31:16 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:31:5:31:9 | GSSA Variable test6 | Node should have one enclosing callable but has 0. | +| test.py:39:1:39:16 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:39:5:39:9 | GSSA Variable test7 | Node should have one enclosing callable but has 0. | +| test.py:47:1:47:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:47:5:47:11 | GSSA Variable source2 | Node should have one enclosing callable but has 0. | +| test.py:50:1:50:15 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:50:5:50:9 | GSSA Variable sink2 | Node should have one enclosing callable but has 0. | +| test.py:53:1:53:21 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:53:5:53:9 | GSSA Variable sink3 | Node should have one enclosing callable but has 0. | +| test.py:57:1:57:16 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:57:5:57:9 | GSSA Variable test8 | Node should have one enclosing callable but has 0. | +| test.py:62:1:62:16 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:62:5:62:9 | GSSA Variable test9 | Node should have one enclosing callable but has 0. | +| test.py:69:1:69:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:69:5:69:10 | GSSA Variable test10 | Node should have one enclosing callable but has 0. | +| test.py:76:1:76:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:76:5:76:7 | GSSA Variable hub | Node should have one enclosing callable but has 0. | +| test.py:79:1:79:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:79:5:79:10 | GSSA Variable test11 | Node should have one enclosing callable but has 0. | +| test.py:84:1:84:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:84:5:84:10 | GSSA Variable test12 | Node should have one enclosing callable but has 0. | +| test.py:89:8:89:13 | ControlFlowNode for ImportExpr | Node should have one enclosing callable but has 0. | +| test.py:89:8:89:13 | GSSA Variable module | Node should have one enclosing callable but has 0. | +| test.py:91:1:91:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:91:5:91:10 | GSSA Variable test13 | Node should have one enclosing callable but has 0. | +| test.py:95:1:95:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:95:5:95:10 | GSSA Variable test14 | Node should have one enclosing callable but has 0. | +| test.py:99:1:99:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:99:5:99:10 | GSSA Variable test15 | Node should have one enclosing callable but has 0. | +| test.py:103:1:103:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:103:5:103:10 | GSSA Variable test16 | Node should have one enclosing callable but has 0. | +| test.py:108:1:108:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:108:5:108:10 | GSSA Variable test20 | Node should have one enclosing callable but has 0. | +| test.py:118:1:118:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:118:5:118:10 | GSSA Variable test21 | Node should have one enclosing callable but has 0. | +| test.py:128:1:128:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:128:5:128:10 | GSSA Variable test22 | Node should have one enclosing callable but has 0. | +| test.py:139:20:139:38 | ControlFlowNode for ImportMember | Node should have one enclosing callable but has 0. | +| test.py:139:33:139:38 | GSSA Variable unsafe | Node should have one enclosing callable but has 0. | +| test.py:140:1:140:12 | ControlFlowNode for SINK() | Node should have one enclosing callable but has 0. | +| test.py:140:1:140:12 | GSSA Variable unsafe | Node should have one enclosing callable but has 0. | +| test.py:140:6:140:11 | ControlFlowNode for unsafe | Node should have one enclosing callable but has 0. | +| test.py:142:1:142:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:142:5:142:10 | GSSA Variable test23 | Node should have one enclosing callable but has 0. | +| test.py:146:1:146:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:146:5:146:10 | GSSA Variable test24 | Node should have one enclosing callable but has 0. | +| test.py:151:1:151:29 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:151:5:151:22 | GSSA Variable test_update_extend | Node should have one enclosing callable but has 0. | +| test.py:161:1:161:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. | +| test.py:161:5:161:14 | GSSA Variable test_truth | Node should have one enclosing callable but has 0. | +uniqueType +uniqueNodeLocation +missingLocation +uniqueNodeToString +missingToString +parameterCallable +localFlowIsLocal +compatibleTypesReflexive +unreachableNodeCCtx +localCallNodes +postIsNotPre +postHasUniquePre +uniquePostUpdate +postIsInSameCallable +reverseRead +storeIsPostUpdate +argHasPostUpdate +| test.py:25:10:25:10 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. | +| test.py:29:10:29:10 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. | +| test.py:48:19:48:21 | ControlFlowNode for arg | ArgumentNode is missing PostUpdateNode. | +| test.py:51:10:51:12 | ControlFlowNode for arg | ArgumentNode is missing PostUpdateNode. | +| test.py:55:14:55:16 | ControlFlowNode for arg | ArgumentNode is missing PostUpdateNode. | +| test.py:59:11:59:11 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. | +| test.py:67:11:67:14 | ControlFlowNode for cond | ArgumentNode is missing PostUpdateNode. | +| test.py:67:17:67:17 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. | +| test.py:74:11:74:14 | ControlFlowNode for cond | ArgumentNode is missing PostUpdateNode. | +| test.py:74:17:74:17 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. | +| test.py:81:13:81:13 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. | +| test.py:86:13:86:13 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. | diff --git a/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.ql b/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.ql new file mode 100644 index 000000000000..6cf01e722ece --- /dev/null +++ b/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.ql @@ -0,0 +1 @@ +import experimental.dataflow.internal.DataFlowImplConsistency::Consistency diff --git a/python/ql/test/experimental/dataflow/consistency/test.py b/python/ql/test/experimental/dataflow/consistency/test.py new file mode 100644 index 000000000000..22c5e2e0fc79 --- /dev/null +++ b/python/ql/test/experimental/dataflow/consistency/test.py @@ -0,0 +1,171 @@ +# This is currently a copy of the integration tests. +# It should contain many syntactic constructs, so should +# perhaps be taken from coverage once that is done. +# (We might even put the consistency check in there.) + +def test1(): + SINK(SOURCE) + +def test2(): + s = SOURCE + SINK(s) + +def source(): + return SOURCE + +def sink(arg): + SINK(arg) + +def test3(): + t = source() + SINK(t) + +def test4(): + t = SOURCE + sink(t) + +def test5(): + t = source() + sink(t) + +def test6(cond): + if cond: + t = "Safe" + else: + t = SOURCE + if cond: + SINK(t) + +def test7(cond): + if cond: + t = SOURCE + else: + t = "Safe" + if cond: + SINK(t) + +def source2(arg): + return source(arg) + +def sink2(arg): + sink(arg) + +def sink3(cond, arg): + if cond: + sink(arg) + +def test8(cond): + t = source2() + sink2(t) + +#False positive +def test9(cond): + if cond: + t = "Safe" + else: + t = SOURCE + sink3(cond, t) + +def test10(cond): + if cond: + t = SOURCE + else: + t = "Safe" + sink3(cond, t) + +def hub(arg): + return arg + +def test11(): + t = SOURCE + t = hub(t) + SINK(t) + +def test12(): + t = "safe" + t = hub(t) + SINK(t) + +import module + +def test13(): + t = module.dangerous + SINK(t) + +def test14(): + t = module.safe + SINK(t) + +def test15(): + t = module.safe2 + SINK(t) + +def test16(): + t = module.dangerous_func() + SINK(t) + + +def test20(cond): + if cond: + t = CUSTOM_SOURCE + else: + t = SOURCE + if cond: + CUSTOM_SINK(t) + else: + SINK(t) + +def test21(cond): + if cond: + t = CUSTOM_SOURCE + else: + t = SOURCE + if not cond: + CUSTOM_SINK(t) + else: + SINK(t) + +def test22(cond): + if cond: + t = CUSTOM_SOURCE + else: + t = SOURCE + t = TAINT_FROM_ARG(t) + if cond: + CUSTOM_SINK(t) + else: + SINK(t) + +from module import dangerous as unsafe +SINK(unsafe) + +def test23(): + with SOURCE as t: + SINK(t) + +def test24(): + s = SOURCE + SANITIZE(s) + SINK(s) + +def test_update_extend(x, y): + l = [SOURCE] + d = {"key" : SOURCE} + x.extend(l) + y.update(d) + SINK(x[0]) + SINK(y["key"]) + l2 = list(l) + d2 = dict(d) + +def test_truth(): + t = SOURCE + if t: + SINK(t) + else: + SINK(t) # Regression: FP here + if not t: + SINK(t) # Regression: FP here + else: + SINK(t) + diff --git a/python/ql/test/experimental/dataflow/coverage/dataflow.expected b/python/ql/test/experimental/dataflow/coverage/dataflow.expected new file mode 100644 index 000000000000..26384d0e28c7 --- /dev/null +++ b/python/ql/test/experimental/dataflow/coverage/dataflow.expected @@ -0,0 +1,6 @@ +| test.py:20:9:20:14 | ControlFlowNode for SOURCE | test.py:21:10:21:10 | ControlFlowNode for x | +| test.py:25:9:25:16 | ControlFlowNode for Str | test.py:26:10:26:10 | ControlFlowNode for x | +| test.py:29:9:29:17 | ControlFlowNode for Str | test.py:30:10:30:10 | ControlFlowNode for x | +| test.py:33:9:33:10 | ControlFlowNode for IntegerLiteral | test.py:34:10:34:10 | ControlFlowNode for x | +| test.py:37:9:37:12 | ControlFlowNode for FloatLiteral | test.py:38:10:38:10 | ControlFlowNode for x | +| test.py:46:10:46:15 | ControlFlowNode for SOURCE | test.py:47:10:47:10 | ControlFlowNode for x | diff --git a/python/ql/test/experimental/dataflow/coverage/dataflow.ql b/python/ql/test/experimental/dataflow/coverage/dataflow.ql new file mode 100644 index 000000000000..2d6e71ea6797 --- /dev/null +++ b/python/ql/test/experimental/dataflow/coverage/dataflow.ql @@ -0,0 +1,9 @@ +import experimental.dataflow.testConfig + +from + DataFlow::Node source, + DataFlow::Node sink +where + exists(TestConfiguration cfg | cfg.hasFlow(source, sink)) +select + source, sink diff --git a/python/ql/test/experimental/dataflow/coverage/localFlow.expected b/python/ql/test/experimental/dataflow/coverage/localFlow.expected new file mode 100644 index 000000000000..9ee9bddb6696 --- /dev/null +++ b/python/ql/test/experimental/dataflow/coverage/localFlow.expected @@ -0,0 +1,7 @@ +| test.py:13:5:13:5 | SSA variable x | test.py:12:1:12:33 | Exit node for Function test_tuple_with_local_flow | +| test.py:13:5:13:5 | SSA variable x | test.py:14:9:14:9 | ControlFlowNode for x | +| test.py:13:10:13:18 | ControlFlowNode for Tuple | test.py:13:5:13:5 | SSA variable x | +| test.py:14:5:14:5 | SSA variable y | test.py:15:5:15:11 | SSA variable y | +| test.py:14:5:14:5 | SSA variable y | test.py:15:10:15:10 | ControlFlowNode for y | +| test.py:14:9:14:12 | ControlFlowNode for Subscript | test.py:14:5:14:5 | SSA variable y | +| test.py:15:5:15:11 | SSA variable y | test.py:12:1:12:33 | Exit node for Function test_tuple_with_local_flow | diff --git a/python/ql/test/experimental/dataflow/coverage/localFlow.ql b/python/ql/test/experimental/dataflow/coverage/localFlow.ql new file mode 100644 index 000000000000..1b413bedeca3 --- /dev/null +++ b/python/ql/test/experimental/dataflow/coverage/localFlow.ql @@ -0,0 +1,8 @@ +import python +import experimental.dataflow.DataFlow + +from DataFlow::Node nodeFrom, DataFlow::Node nodeTo +where + DataFlow::localFlowStep(nodeFrom, nodeTo) and + nodeFrom.getEnclosingCallable().getName().matches("%\\_with\\_local\\_flow") +select nodeFrom, nodeTo diff --git a/python/ql/test/experimental/dataflow/coverage/test.py b/python/ql/test/experimental/dataflow/coverage/test.py new file mode 100644 index 000000000000..31634d7426b8 --- /dev/null +++ b/python/ql/test/experimental/dataflow/coverage/test.py @@ -0,0 +1,131 @@ +# This should cover all the syntactical constructs that we hope to support +# Intended sources should be the variable `SOURCE` and intended sinks should be +# arguments to the function `SINK` (see python/ql/test/experimental/dataflow/testConfig.qll). +# +# Functions whose name ends with "_with_local_flow" will also be tested for local flow. + +# These are included so that we can easily evaluate the test code +SOURCE = "source" +def SINK(x): + print(x) + +def test_tuple_with_local_flow(): + x = (3, SOURCE) + y = x[1] + SINK(y) + +# List taken from https://docs.python.org/3/reference/expressions.html +# 6.2.1. Identifiers (Names) +def test_names(): + x = SOURCE + SINK(x) + +# 6.2.2. Literals +def test_string_literal(): + x = "source" + SINK(x) + +def test_bytes_literal(): + x = b"source" + SINK(x) + +def test_integer_literal(): + x = 42 + SINK(x) + +def test_floatnumber_literal(): + x = 42.0 + SINK(x) + +def test_imagnumber_literal(): + x = 42j + SINK(x) + +# 6.2.3. Parenthesized forms +def test_parenthesized_form(): + x = (SOURCE) + SINK(x) + +# 6.2.5. List displays +def test_list_display(): + x = [SOURCE] + SINK(x[0]) + +def test_list_comprehension(): + x = [SOURCE for y in [3]] + SINK(x[0]) + +def test_nested_list_display(): + x = [* [SOURCE]] + SINK(x[0]) + +# 6.2.6. Set displays +def test_set_display(): + x = {SOURCE} + SINK(x.pop()) + +def test_set_comprehension(): + x = {SOURCE for y in [3]} + SINK(x.pop()) + +def test_nested_set_display(): + x = {* {SOURCE}} + SINK(x.pop()) + +# 6.2.7. Dictionary displays +def test_dict_display(): + x = {"s": SOURCE} + SINK(x["s"]) + +def test_dict_comprehension(): + x = {y: SOURCE for y in ["s"]} + SINK(x["s"]) + +def test_nested_dict_display(): + x = {** {"s": SOURCE}} + SINK(x["s"]) + +# 6.2.8. Generator expressions +def test_generator(): + x = (SOURCE for y in [3]) + SINK([*x][0]) + +# List taken from https://docs.python.org/3/reference/expressions.html +# 6. Expressions +# 6.1. Arithmetic conversions +# 6.2. Atoms +# 6.2.1. Identifiers (Names) +# 6.2.2. Literals +# 6.2.3. Parenthesized forms +# 6.2.4. Displays for lists, sets and dictionaries +# 6.2.5. List displays +# 6.2.6. Set displays +# 6.2.7. Dictionary displays +# 6.2.8. Generator expressions +# 6.2.9. Yield expressions +# 6.2.9.1. Generator-iterator methods +# 6.2.9.2. Examples +# 6.2.9.3. Asynchronous generator functions +# 6.2.9.4. Asynchronous generator-iterator methods +# 6.3. Primaries +# 6.3.1. Attribute references +# 6.3.2. Subscriptions +# 6.3.3. Slicings +# 6.3.4. Calls +# 6.4. Await expression +# 6.5. The power operator +# 6.6. Unary arithmetic and bitwise operations +# 6.7. Binary arithmetic operations +# 6.8. Shifting operations +# 6.9. Binary bitwise operations +# 6.10. Comparisons +# 6.10.1. Value comparisons +# 6.10.2. Membership test operations +# 6.10.3. Identity comparisons +# 6.11. Boolean operations +# 6.12. Assignment expressions +# 6.13. Conditional expressions +# 6.14. Lambdas +# 6.15. Expression lists +# 6.16. Evaluation order +# 6.17. Operator precedence diff --git a/python/ql/test/experimental/dataflow/regression/dataflow.expected b/python/ql/test/experimental/dataflow/regression/dataflow.expected new file mode 100644 index 000000000000..0d5dca4d4d13 --- /dev/null +++ b/python/ql/test/experimental/dataflow/regression/dataflow.expected @@ -0,0 +1,16 @@ +| test.py:3:10:3:15 | ControlFlowNode for SOURCE | test.py:3:10:3:15 | ControlFlowNode for SOURCE | +| test.py:6:9:6:14 | ControlFlowNode for SOURCE | test.py:7:10:7:10 | ControlFlowNode for s | +| test.py:10:12:10:17 | ControlFlowNode for SOURCE | test.py:13:10:13:12 | ControlFlowNode for arg | +| test.py:10:12:10:17 | ControlFlowNode for SOURCE | test.py:17:10:17:10 | ControlFlowNode for t | +| test.py:20:9:20:14 | ControlFlowNode for SOURCE | test.py:13:10:13:12 | ControlFlowNode for arg | +| test.py:37:13:37:18 | ControlFlowNode for SOURCE | test.py:41:14:41:14 | ControlFlowNode for t | +| test.py:62:13:62:18 | ControlFlowNode for SOURCE | test.py:13:10:13:12 | ControlFlowNode for arg | +| test.py:67:13:67:18 | ControlFlowNode for SOURCE | test.py:13:10:13:12 | ControlFlowNode for arg | +| test.py:76:9:76:14 | ControlFlowNode for SOURCE | test.py:78:10:78:10 | ControlFlowNode for t | +| test.py:108:13:108:18 | ControlFlowNode for SOURCE | test.py:112:14:112:14 | ControlFlowNode for t | +| test.py:139:10:139:15 | ControlFlowNode for SOURCE | test.py:140:14:140:14 | ControlFlowNode for t | +| test.py:143:9:143:14 | ControlFlowNode for SOURCE | test.py:145:10:145:10 | ControlFlowNode for s | +| test.py:158:9:158:14 | ControlFlowNode for SOURCE | test.py:160:14:160:14 | ControlFlowNode for t | +| test.py:158:9:158:14 | ControlFlowNode for SOURCE | test.py:162:14:162:14 | ControlFlowNode for t | +| test.py:158:9:158:14 | ControlFlowNode for SOURCE | test.py:164:14:164:14 | ControlFlowNode for t | +| test.py:158:9:158:14 | ControlFlowNode for SOURCE | test.py:166:14:166:14 | ControlFlowNode for t | diff --git a/python/ql/test/experimental/dataflow/regression/dataflow.ql b/python/ql/test/experimental/dataflow/regression/dataflow.ql new file mode 100644 index 000000000000..066a42b2c1c9 --- /dev/null +++ b/python/ql/test/experimental/dataflow/regression/dataflow.ql @@ -0,0 +1,16 @@ +/** + * This should be compared to + * python/ql/test/library-tests/taint/dataflow/Dataflow.ql + * A first goal is to have identical results; after that we + * hope to remove the false positive. + */ + +import experimental.dataflow.testConfig + +from + DataFlow::Node source, + DataFlow::Node sink +where + exists(TestConfiguration cfg | cfg.hasFlow(source, sink)) +select + source, sink diff --git a/python/ql/test/experimental/dataflow/regression/test.py b/python/ql/test/experimental/dataflow/regression/test.py new file mode 100644 index 000000000000..233c010a5bae --- /dev/null +++ b/python/ql/test/experimental/dataflow/regression/test.py @@ -0,0 +1,167 @@ + +def test1(): + SINK(SOURCE) + +def test2(): + s = SOURCE + SINK(s) + +def source(): + return SOURCE + +def sink(arg): + SINK(arg) + +def test3(): + t = source() + SINK(t) + +def test4(): + t = SOURCE + sink(t) + +def test5(): + t = source() + sink(t) + +def test6(cond): + if cond: + t = "Safe" + else: + t = SOURCE + if cond: + SINK(t) + +def test7(cond): + if cond: + t = SOURCE + else: + t = "Safe" + if cond: + SINK(t) + +def source2(arg): + return source(arg) + +def sink2(arg): + sink(arg) + +def sink3(cond, arg): + if cond: + sink(arg) + +def test8(cond): + t = source2() + sink2(t) + +#False positive +def test9(cond): + if cond: + t = "Safe" + else: + t = SOURCE + sink3(cond, t) + +def test10(cond): + if cond: + t = SOURCE + else: + t = "Safe" + sink3(cond, t) + +def hub(arg): + return arg + +def test11(): + t = SOURCE + t = hub(t) + SINK(t) + +def test12(): + t = "safe" + t = hub(t) + SINK(t) + +import module + +def test13(): + t = module.dangerous + SINK(t) + +def test14(): + t = module.safe + SINK(t) + +def test15(): + t = module.safe2 + SINK(t) + +def test16(): + t = module.dangerous_func() + SINK(t) + + +def test20(cond): + if cond: + t = CUSTOM_SOURCE + else: + t = SOURCE + if cond: + CUSTOM_SINK(t) + else: + SINK(t) + +def test21(cond): + if cond: + t = CUSTOM_SOURCE + else: + t = SOURCE + if not cond: + CUSTOM_SINK(t) + else: + SINK(t) + +def test22(cond): + if cond: + t = CUSTOM_SOURCE + else: + t = SOURCE + t = TAINT_FROM_ARG(t) + if cond: + CUSTOM_SINK(t) + else: + SINK(t) + +from module import dangerous as unsafe +SINK(unsafe) + +def test23(): + with SOURCE as t: + SINK(t) + +def test24(): + s = SOURCE + SANITIZE(s) + SINK(s) + +def test_update_extend(x, y): + l = [SOURCE] + d = {"key" : SOURCE} + x.extend(l) + y.update(d) + SINK(x[0]) + SINK(y["key"]) + l2 = list(l) + d2 = dict(d) + +def test_truth(): + t = SOURCE + if t: + SINK(t) + else: + SINK(t) # Regression: FP here + if not t: + SINK(t) # Regression: FP here + else: + SINK(t) + diff --git a/python/ql/test/experimental/dataflow/testConfig.qll b/python/ql/test/experimental/dataflow/testConfig.qll new file mode 100644 index 000000000000..8d89991d8c28 --- /dev/null +++ b/python/ql/test/experimental/dataflow/testConfig.qll @@ -0,0 +1,45 @@ +/** + * Configuration to test selected data flow + * Sources in the source code are denoted by the special name `SOURCE`, + * and sinks are denoted by arguments to the special function `SINK`. + * For example, given the test code + * ```python + * def test(): + * s = SOURCE + * SINK(s) + * ``` + * `SOURCE` will be a source and the second occurance of `s` will be a sink. + * + * In order to test literals, alternative sources are defined for each type: + * + * for | use + * ---------- + * string | `"source"` + * integer | `42` + * float | `42.0` + * complex | `42j` (not supported yet) + */ + +import experimental.dataflow.DataFlow + +class TestConfiguration extends DataFlow::Configuration { + TestConfiguration() { this = "TestConfiguration" } + + override predicate isSource(DataFlow::Node node) { + node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "SOURCE" + or + node.(DataFlow::CfgNode).getNode().getNode().(StrConst).getS() = "source" + or + node.(DataFlow::CfgNode).getNode().getNode().(IntegerLiteral).getN() = "42" + or + node.(DataFlow::CfgNode).getNode().getNode().(FloatLiteral).getN() = "42.0" + // No support for complex numbers + } + + override predicate isSink(DataFlow::Node node) { + exists(CallNode call | + call.getFunction().(NameNode).getId() in ["SINK", "SINK_F"] and + node.(DataFlow::CfgNode).getNode() = call.getAnArg() + ) + } +}