diff --git a/java/ql/lib/qlpack.yml b/java/ql/lib/qlpack.yml index c7a6a045451e..c27a06994b2f 100644 --- a/java/ql/lib/qlpack.yml +++ b/java/ql/lib/qlpack.yml @@ -23,3 +23,4 @@ dataExtensions: - ext/generated/*.model.yml - ext/experimental/*.model.yml warnOnImplicitThis: true +compileForOverlayEval: true diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplSpecific.qll b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplSpecific.qll index 65034ee08b93..97f5020142e0 100644 --- a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplSpecific.qll +++ b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplSpecific.qll @@ -6,6 +6,7 @@ module; private import semmle.code.Location private import codeql.dataflow.DataFlow +private import semmle.code.java.Overlay module Private { import DataFlowPrivate @@ -29,4 +30,6 @@ module JavaDataFlow implements InputSig { predicate mayBenefitFromCallContext = Private::mayBenefitFromCallContext/1; predicate viableImplInCallContext = Private::viableImplInCallContext/2; + + predicate isEvaluatingInOverlay = isOverlay/0; } diff --git a/shared/dataflow/codeql/dataflow/DataFlow.qll b/shared/dataflow/codeql/dataflow/DataFlow.qll index 3483287e3b39..efcd54ff696d 100644 --- a/shared/dataflow/codeql/dataflow/DataFlow.qll +++ b/shared/dataflow/codeql/dataflow/DataFlow.qll @@ -349,6 +349,18 @@ signature module InputSig { /** Holds if `fieldFlowBranchLimit` should be ignored for flow going into/out of `c`. */ default predicate ignoreFieldFlowBranchLimit(DataFlowCallable c) { none() } + + /** + * Holds if the evaluator is currently evaluating with an overlay. The + * implementation of this predicate needs to be `overlay[local]`. For a + * language with no overlay support, `none()` is a valid implementation. + * + * When called from a local predicate, this predicate holds if we are in the + * overlay-only local evaluation. When called from a global predicate, this + * predicate holds if we are evaluating globally with overlay and base both + * visible. + */ + default predicate isEvaluatingInOverlay() { none() } } module Configs Lang> { diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index a7e0736432ac..f06faafcf964 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -3376,24 +3376,81 @@ module MakeImpl Lang> { import S6 /** - * Holds if data can flow from `source` to `sink`. + * Holds if data can flow from `source` to `sink`. When running in overlay + * incremental mode on a supported language, this predicate only has + * results where either the source or sink is in the overlay database. */ - predicate flow(Node source, Node sink) { + private predicate flowIncremental(Node source, Node sink) { exists(PathNode source0, PathNode sink0 | flowPath(source0, sink0) and source0.getNode() = source and sink0.getNode() = sink ) } + overlay[local] + private predicate flowLocal(Node source, Node sink) = + forceLocal(flowIncremental/2)(source, sink) + + /** + * Holds if data can flow from `source` to `sink`. + */ + predicate flow(Node source, Node sink) { + flowIncremental(source, sink) + or + flowLocal(source, sink) + } + + /** + * Holds if data can flow from some source to `sink`. When running in + * overlay incremental mode on a supported language, this predicate only + * has results where either the source or sink is in the overlay database. + */ + private predicate flowToIncremental(Node sink) { + exists(PathNode n | n.isSink() and n.getNode() = sink) + } + + overlay[local] + private predicate flowToLocal(Node sink) = forceLocal(flowToIncremental/1)(sink) + /** * Holds if data can flow from some source to `sink`. */ - predicate flowTo(Node sink) { exists(PathNode n | n.isSink() and n.getNode() = sink) } + predicate flowTo(Node sink) { + flowToIncremental(sink) + or + flowToLocal(sink) + } /** * Holds if data can flow from some source to `sink`. */ predicate flowToExpr(Expr sink) { flowTo(exprNode(sink)) } + /** + * Holds if data can flow from `source` to some sink. When running in + * overlay incremental mode on a supported language, this predicate only + * has results where either the source or sink is in the overlay database. + */ + private predicate flowFromIncremental(Node source) { + exists(PathNode n | n.isSource() and n.getNode() = source) + } + + overlay[local] + private predicate flowFromLocal(Node source) = forceLocal(flowFromIncremental/1)(source) + + /** + * Holds if data can flow from `source` to some sink. + */ + predicate flowFrom(Node source) { + flowFromIncremental(source) + or + flowFromLocal(source) + } + + /** + * Holds if data can flow from `source` to some sink. + */ + predicate flowFromExpr(Expr source) { flowFrom(exprNode(source)) } + /** * INTERNAL: Only for debugging. * diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll index c7883df0de18..6e716b74bc33 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll @@ -4,7 +4,7 @@ * Provides an implementation of a fast initial pruning of global * (interprocedural) data flow reachability (Stage 1). */ -overlay[local?] +overlay[local?] // when this is removed, put `overlay[local?]` on `isOverlayNode`. module; private import codeql.util.Unit @@ -129,23 +129,95 @@ module MakeImplStage1 Lang> { private module AlertFiltering = AlertFilteringImpl; + /** + * Holds if the given node is visible in overlay-only local evaluation. + * + * This predicate needs to be `overlay[local?]`, either directly or + * through annotations from an outer scope. If `Node` is global for the + * language under analysis, then every node is considered an overlay + * node, which means there will effectively be no overlay-based + * filtering of sources and sinks. + */ + private predicate isOverlayNode(Node node) { + isEvaluatingInOverlay() and + // Any local node is an overlay node if we are evaluating in overlay mode + exists(node) + } + + /** + * Holds if `node` comes from a file that was in the diff for which we + * are producing results. + */ + overlay[global] + private predicate isDiffFileNode(Node node) { + exists(string filePath | + node.getLocation().hasLocationInfo(filePath, _, _, _, _) and + AlertFiltering::fileIsInDiff(filePath) + ) + } + + overlay[global] pragma[nomagic] private predicate isFilteredSource(Node source) { Config::isSource(source, _) and - if Config::observeDiffInformedIncrementalMode() - then AlertFiltering::filterByLocation(Config::getASelectedSourceLocation(source)) - else any() + // Data flow is always incremental in one of two ways. + // 1. If the configuration is diff-informed and diff information is + // available, we filter to only include nodes in the diff, which + // gives the smallest set of nodes. + // 2. If not, in global evaluation with overlay, we filter to only + // include nodes from files in the overlay or the diff; flow from + // other nodes will be added back later. There can be two reasons + // why we are in this case: + // 1. This could be the primary configuration for a query that + // hasn't yet become diff-informed. In that case, the + // `getASelectedSourceLocation` information is probably just the + // default, and it's a fairly safe overapproximation to + // effectively expand to all nodes in the file (via + // `isDiffFileNode`). + // 2. This could be a secondary configuration, like a helper + // configuration for finding sources or sinks of a primary + // configuration. In that case, the results of this configuration + // are typically in the same file as the final alert. + if + Config::observeDiffInformedIncrementalMode() and + AlertFiltering::diffInformationAvailable() + then AlertFiltering::locationIsInDiff(Config::getASelectedSourceLocation(source)) + else ( + // If we are in base-only global evaluation, do not filter out any sources. + not isEvaluatingInOverlay() + or + // If we are in global evaluation with an overlay present, restrict + // sources to those visible in the overlay or + isOverlayNode(source) + or + // those from files in the diff. The diff is a subset of the overlay + // in the common case, but this is not guaranteed. Including the diff here + // ensures that we re-evaluate flow that passes from a file in the + // diff (but in the base), through a file in the overlay with + // possibly important changes, back to the base. + isDiffFileNode(source) + ) } + overlay[global] pragma[nomagic] private predicate isFilteredSink(Node sink) { ( Config::isSink(sink, _) or Config::isSink(sink) ) and - if Config::observeDiffInformedIncrementalMode() - then AlertFiltering::filterByLocation(Config::getASelectedSinkLocation(sink)) - else any() + // See the comments in `isFilteredSource` for the reasoning behind the following. + if + Config::observeDiffInformedIncrementalMode() and + AlertFiltering::diffInformationAvailable() + then AlertFiltering::locationIsInDiff(Config::getASelectedSinkLocation(sink)) + else ( + not isEvaluatingInOverlay() + or + isOverlayNode(sink) + or + isDiffFileNode(sink) + ) } private predicate hasFilteredSource() { isFilteredSource(_) } diff --git a/shared/util/codeql/util/AlertFiltering.qll b/shared/util/codeql/util/AlertFiltering.qll index bfc9d901b2fa..aabd42c7dbb2 100644 --- a/shared/util/codeql/util/AlertFiltering.qll +++ b/shared/util/codeql/util/AlertFiltering.qll @@ -82,6 +82,21 @@ module AlertFilteringImpl { ) } + /** Holds if diff information is available in this evaluation. */ + predicate diffInformationAvailable() { + restrictAlertsTo(_, _, _) or restrictAlertsToExactLocation(_, _, _, _, _) + } + + /** + * Holds if diff information is available, and `filePath` is in the diff + * range. + */ + predicate fileIsInDiff(string filePath) { + restrictAlertsTo(filePath, _, _) + or + restrictAlertsToExactLocation(filePath, _, _, _, _) + } + /** * Holds if the given location intersects the diff range. When that is the * case, ensuring that alerts mentioning this location are included in the @@ -103,8 +118,17 @@ module AlertFilteringImpl { */ bindingset[location] predicate filterByLocation(Location location) { - not restrictAlertsTo(_, _, _) and not restrictAlertsToExactLocation(_, _, _, _, _) + not diffInformationAvailable() or + locationIsInDiff(location) + } + + /** + * Like `filterByLocation`, except that if there is no diff range, this + * predicate never holds. + */ + bindingset[location] + predicate locationIsInDiff(Location location) { exists(string filePath | restrictAlertsToEntireFile(filePath) and location.hasLocationInfo(filePath, _, _, _, _)