Skip to content

Experiment: Make all data flow incremental #20028

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions java/ql/lib/qlpack.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@ dataExtensions:
- ext/generated/*.model.yml
- ext/experimental/*.model.yml
warnOnImplicitThis: true
compileForOverlayEval: true
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ module;

private import semmle.code.Location
private import codeql.dataflow.DataFlow
private import semmle.code.java.Overlay

module Private {
import DataFlowPrivate
Expand All @@ -29,4 +30,6 @@ module JavaDataFlow implements InputSig<Location> {
predicate mayBenefitFromCallContext = Private::mayBenefitFromCallContext/1;

predicate viableImplInCallContext = Private::viableImplInCallContext/2;

predicate isEvaluatingInOverlay = isOverlay/0;
}
12 changes: 12 additions & 0 deletions shared/dataflow/codeql/dataflow/DataFlow.qll
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,18 @@ signature module InputSig<LocationSig Location> {

/** Holds if `fieldFlowBranchLimit` should be ignored for flow going into/out of `c`. */
default predicate ignoreFieldFlowBranchLimit(DataFlowCallable c) { none() }

/**
* Holds if the evaluator is currently evaluating with an overlay. The
* implementation of this predicate needs to be `overlay[local]`. For a
* language with no overlay support, `none()` is a valid implementation.
*
* When called from a local predicate, this predicate holds if we are in the
* overlay-only local evaluation. When called from a global predicate, this
* predicate holds if we are evaluating globally with overlay and base both
* visible.
*/
default predicate isEvaluatingInOverlay() { none() }
}

module Configs<LocationSig Location, InputSig<Location> Lang> {
Expand Down
63 changes: 60 additions & 3 deletions shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll
Original file line number Diff line number Diff line change
Expand Up @@ -3376,24 +3376,81 @@ module MakeImpl<LocationSig Location, InputSig<Location> Lang> {
import S6

/**
* Holds if data can flow from `source` to `sink`.
* Holds if data can flow from `source` to `sink`. When running in overlay
* incremental mode on a supported language, this predicate only has
* results where either the source or sink is in the overlay database.
*/
predicate flow(Node source, Node sink) {
private predicate flowIncremental(Node source, Node sink) {
exists(PathNode source0, PathNode sink0 |
flowPath(source0, sink0) and source0.getNode() = source and sink0.getNode() = sink
)
}

overlay[local]
private predicate flowLocal(Node source, Node sink) =
forceLocal(flowIncremental/2)(source, sink)

/**
* Holds if data can flow from `source` to `sink`.
*/
predicate flow(Node source, Node sink) {
flowIncremental(source, sink)
or
flowLocal(source, sink)
}

/**
* Holds if data can flow from some source to `sink`. When running in
* overlay incremental mode on a supported language, this predicate only
* has results where either the source or sink is in the overlay database.
*/
private predicate flowToIncremental(Node sink) {
exists(PathNode n | n.isSink() and n.getNode() = sink)
}

overlay[local]
private predicate flowToLocal(Node sink) = forceLocal(flowToIncremental/1)(sink)

/**
* Holds if data can flow from some source to `sink`.
*/
predicate flowTo(Node sink) { exists(PathNode n | n.isSink() and n.getNode() = sink) }
predicate flowTo(Node sink) {
flowToIncremental(sink)
or
flowToLocal(sink)
}

/**
* Holds if data can flow from some source to `sink`.
*/
predicate flowToExpr(Expr sink) { flowTo(exprNode(sink)) }

/**
* Holds if data can flow from `source` to some sink. When running in
* overlay incremental mode on a supported language, this predicate only
* has results where either the source or sink is in the overlay database.
*/
private predicate flowFromIncremental(Node source) {
exists(PathNode n | n.isSource() and n.getNode() = source)
}

overlay[local]
private predicate flowFromLocal(Node source) = forceLocal(flowFromIncremental/1)(source)

/**
* Holds if data can flow from `source` to some sink.
*/
predicate flowFrom(Node source) {
flowFromIncremental(source)
or
flowFromLocal(source)
}

/**
* Holds if data can flow from `source` to some sink.
*/
predicate flowFromExpr(Expr source) { flowFrom(exprNode(source)) }

/**
* INTERNAL: Only for debugging.
*
Expand Down
86 changes: 79 additions & 7 deletions shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* Provides an implementation of a fast initial pruning of global
* (interprocedural) data flow reachability (Stage 1).
*/
overlay[local?]
overlay[local?] // when this is removed, put `overlay[local?]` on `isOverlayNode`.
module;

private import codeql.util.Unit
Expand Down Expand Up @@ -129,23 +129,95 @@ module MakeImplStage1<LocationSig Location, InputSig<Location> Lang> {

private module AlertFiltering = AlertFilteringImpl<Location>;

/**
* Holds if the given node is visible in overlay-only local evaluation.
*
* This predicate needs to be `overlay[local?]`, either directly or
* through annotations from an outer scope. If `Node` is global for the
* language under analysis, then every node is considered an overlay
* node, which means there will effectively be no overlay-based
* filtering of sources and sinks.
*/
private predicate isOverlayNode(Node node) {
isEvaluatingInOverlay() and
// Any local node is an overlay node if we are evaluating in overlay mode
exists(node)
}

/**
* Holds if `node` comes from a file that was in the diff for which we
* are producing results.
*/
overlay[global]
private predicate isDiffFileNode(Node node) {
exists(string filePath |
node.getLocation().hasLocationInfo(filePath, _, _, _, _) and
AlertFiltering::fileIsInDiff(filePath)
)
}

overlay[global]
pragma[nomagic]
private predicate isFilteredSource(Node source) {
Config::isSource(source, _) and
if Config::observeDiffInformedIncrementalMode()
then AlertFiltering::filterByLocation(Config::getASelectedSourceLocation(source))
else any()
// Data flow is always incremental in one of two ways.
// 1. If the configuration is diff-informed and diff information is
// available, we filter to only include nodes in the diff, which
// gives the smallest set of nodes.
// 2. If not, in global evaluation with overlay, we filter to only
// include nodes from files in the overlay or the diff; flow from
// other nodes will be added back later. There can be two reasons
// why we are in this case:
// 1. This could be the primary configuration for a query that
// hasn't yet become diff-informed. In that case, the
// `getASelectedSourceLocation` information is probably just the
// default, and it's a fairly safe overapproximation to
// effectively expand to all nodes in the file (via
// `isDiffFileNode`).
// 2. This could be a secondary configuration, like a helper
// configuration for finding sources or sinks of a primary
// configuration. In that case, the results of this configuration
// are typically in the same file as the final alert.
if
Config::observeDiffInformedIncrementalMode() and
AlertFiltering::diffInformationAvailable()
then AlertFiltering::locationIsInDiff(Config::getASelectedSourceLocation(source))
else (
// If we are in base-only global evaluation, do not filter out any sources.
not isEvaluatingInOverlay()
or
// If we are in global evaluation with an overlay present, restrict
// sources to those visible in the overlay or
isOverlayNode(source)
or
// those from files in the diff. The diff is a subset of the overlay
// in the common case, but this is not guaranteed. Including the diff here
// ensures that we re-evaluate flow that passes from a file in the
// diff (but in the base), through a file in the overlay with
// possibly important changes, back to the base.
isDiffFileNode(source)
)
}

overlay[global]
pragma[nomagic]
private predicate isFilteredSink(Node sink) {
(
Config::isSink(sink, _) or
Config::isSink(sink)
) and
if Config::observeDiffInformedIncrementalMode()
then AlertFiltering::filterByLocation(Config::getASelectedSinkLocation(sink))
else any()
// See the comments in `isFilteredSource` for the reasoning behind the following.
if
Config::observeDiffInformedIncrementalMode() and
AlertFiltering::diffInformationAvailable()
then AlertFiltering::locationIsInDiff(Config::getASelectedSinkLocation(sink))
else (
not isEvaluatingInOverlay()
or
isOverlayNode(sink)
or
isDiffFileNode(sink)
)
}

private predicate hasFilteredSource() { isFilteredSource(_) }
Expand Down
26 changes: 25 additions & 1 deletion shared/util/codeql/util/AlertFiltering.qll
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,21 @@ module AlertFilteringImpl<LocationSig Location> {
)
}

/** Holds if diff information is available in this evaluation. */
predicate diffInformationAvailable() {
restrictAlertsTo(_, _, _) or restrictAlertsToExactLocation(_, _, _, _, _)
}

/**
* Holds if diff information is available, and `filePath` is in the diff
* range.
*/
predicate fileIsInDiff(string filePath) {
restrictAlertsTo(filePath, _, _)
or
restrictAlertsToExactLocation(filePath, _, _, _, _)
}

/**
* Holds if the given location intersects the diff range. When that is the
* case, ensuring that alerts mentioning this location are included in the
Expand All @@ -103,8 +118,17 @@ module AlertFilteringImpl<LocationSig Location> {
*/
bindingset[location]
predicate filterByLocation(Location location) {
not restrictAlertsTo(_, _, _) and not restrictAlertsToExactLocation(_, _, _, _, _)
not diffInformationAvailable()
or
locationIsInDiff(location)
}

/**
* Like `filterByLocation`, except that if there is no diff range, this
* predicate never holds.
*/
bindingset[location]
predicate locationIsInDiff(Location location) {
exists(string filePath |
restrictAlertsToEntireFile(filePath) and
location.hasLocationInfo(filePath, _, _, _, _)
Expand Down
Loading