diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/AliasedFlow.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/AliasedFlow.qll new file mode 100644 index 000000000000..3bb8141c077c --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/AliasedFlow.qll @@ -0,0 +1,344 @@ +private import cpp as Cpp +private import SsaInternals as Ssa +private import codeql.ssa.Ssa as SsaImplCommon +private import DataFlowPrivate +private import DataFlowUtil as Public +private import DataFlowNodes as Nodes +private import semmle.code.cpp.ir.IR +private import semmle.code.cpp.ir.internal.IRCppLanguage + +private module SsaInput implements SsaImplCommon::InputSig { + import SsaInternalsCommon::InputSigCommon + + class SourceVariable = Ssa::SourceVariable; + + /** + * Holds if `instr` flows to the destination address of a `StoreInstruction` + * and flows from a read of some definition. + */ + private predicate fwd(Node1Impl n, int indirectionIndex) { + nodeHasInstruction1(n, any(VariableAddressInstruction vai), indirectionIndex) + or + exists(Node1Impl n0 | + fwd(n0, indirectionIndex) and + simpleLocalFlowStep1(n0, n, _) + ) + } + + /** + * Holds if `instr` flows to the destination address of a `StoreInstruction` + */ + private predicate revStore(Node1Impl n, int indirectionIndex, int k) { + fwd(pragma[only_bind_into](n), pragma[only_bind_into](indirectionIndex)) and + ( + indirectionIndex > k and + nodeHasOperand1(n, any(StoreInstruction store).getDestinationAddressOperand(), + indirectionIndex - k) + or + exists(Node1Impl n1 | + revStore(n1, pragma[only_bind_into](indirectionIndex), k) and + simpleLocalFlowStep1(n, n1, _) + ) + ) + } + + private newtype TStoreNode1Impl = + MkStoreNode1Impl(Node1Impl n, int indirectionIndex, int k) { revStore(n, indirectionIndex, k) } + + /** + * This predicate holds if + * ``` + * conversionFlow(instr1.getAUse(), instr2, _, false) + * ``` + * and both `instr1` and `instr2` are instructions on a path from a read of + * some definition to the destination address of a `StoreInstruction`. + */ + private predicate flowStoreStep(TStoreNode1Impl node1, TStoreNode1Impl node2) { + exists(Node1Impl n1, Node1Impl n2, int indirectionIndex, int k | + node1 = + MkStoreNode1Impl(n1, pragma[only_bind_into](indirectionIndex), pragma[only_bind_into](k)) and + node2 = + MkStoreNode1Impl(n2, pragma[only_bind_into](indirectionIndex), pragma[only_bind_into](k)) and + simpleLocalFlowStep1(n1, n2, _) + ) + } + + private predicate storeSink(TStoreNode1Impl sink) { + exists(Node1Impl n, int indirectionIndex, int k | + sink = MkStoreNode1Impl(n, indirectionIndex, k) and + // Subtract one because a store writes to the _indirection_ of the address operand + nodeHasOperand1(n, any(StoreInstruction store).getDestinationAddressOperand(), + indirectionIndex - k) + ) + } + + private predicate storeSource(TStoreNode1Impl source) { + exists(Node1Impl n, int indirectionIndex, int k | + source = MkStoreNode1Impl(n, indirectionIndex, k) and + nodeHasInstruction1(n, any(VariableAddressInstruction vai), indirectionIndex) + ) + } + + private predicate flowStorePlusImpl(TStoreNode1Impl node1, TStoreNode1Impl node2) = + doublyBoundedFastTC(flowStoreStep/2, storeSource/1, storeSink/1)(node1, node2) + + private predicate flowStoreStepTCImpl(TStoreNode1Impl node1, TStoreNode1Impl node2) { + storeSource(node1) and + storeSink(node2) and + ( + flowStorePlusImpl(node1, node2) + or + node1 = node2 + ) + } + + private predicate flowStoreStepTC(Node1Impl n1, Node1Impl n2, int indirectionIndex, int k) { + exists(TStoreNode1Impl node1, TStoreNode1Impl node2 | + node1 = + MkStoreNode1Impl(n1, pragma[only_bind_into](indirectionIndex), pragma[only_bind_into](k)) and + node2 = + MkStoreNode1Impl(n2, pragma[only_bind_into](indirectionIndex), pragma[only_bind_into](k)) and + flowStoreStepTCImpl(node1, node2) + ) + } + + /** + * Holds if `instr` flows to the destination address of a `StoreInstruction` + */ + private predicate revLoad(Node1Impl n, int indirectionIndex) { + fwd(pragma[only_bind_into](n), pragma[only_bind_into](indirectionIndex)) and + ( + nodeHasOperand1(n, _, indirectionIndex) + or + exists(Node1Impl n1 | + revLoad(n1, pragma[only_bind_into](indirectionIndex)) and + simpleLocalFlowStep1(n, n1, _) + ) + ) + } + + private newtype TLoadNode1Impl = + MkLoadNode1Impl(Node1Impl n, int indirectionIndex) { revLoad(n, indirectionIndex) } + + private predicate flowLoadStep(TLoadNode1Impl node1, TLoadNode1Impl node2) { + exists(Node1Impl n1, Node1Impl n2, int indirectionIndex | + node1 = MkLoadNode1Impl(n1, pragma[only_bind_into](indirectionIndex)) and + node2 = MkLoadNode1Impl(n2, pragma[only_bind_into](indirectionIndex)) and + simpleLocalFlowStep1(n1, n2, _) + ) + } + + private predicate loadSink(TLoadNode1Impl sink) { + exists(Node1Impl n, int indirectionIndex | + sink = MkLoadNode1Impl(n, indirectionIndex) and + nodeHasOperand1(n, _, indirectionIndex) + ) + } + + private predicate loadSource(TLoadNode1Impl source) { + exists(Node1Impl n, int indirectionIndex | + source = MkLoadNode1Impl(n, indirectionIndex) and + nodeHasInstruction1(n, any(VariableAddressInstruction vai), indirectionIndex) + ) + } + + private predicate flowLoadPlusImpl(TLoadNode1Impl node1, TLoadNode1Impl node2) = + doublyBoundedFastTC(flowLoadStep/2, loadSource/1, loadSink/1)(node1, node2) + + private predicate flowLoadStepTCImpl(TLoadNode1Impl node1, TLoadNode1Impl node2) { + loadSource(node1) and + loadSink(node2) and + ( + flowLoadPlusImpl(node1, node2) + or + node1 = node2 + ) + } + + private predicate flowLoadStepTC(Node1Impl n1, Node1Impl n2, int indirectionIndex) { + exists(TLoadNode1Impl node1, TLoadNode1Impl node2 | + node1 = MkLoadNode1Impl(n1, pragma[only_bind_into](indirectionIndex)) and + node2 = MkLoadNode1Impl(n2, pragma[only_bind_into](indirectionIndex)) and + flowLoadStepTCImpl(node1, node2) + ) + } + + /** + * Holds if the `i`'th instruction in `bb` writes to `v` through an alias. + * `certain` is `true` if write is guaranteed to overwrite the entire + * allocation. + */ + additional predicate variableWrite( + BasicBlock bb, int i, SourceVariable sv, boolean certain, Node1Impl store + ) { + certain = true and + exists( + Node1Impl vai, VariableAddressInstruction vaiInstr, StoreInstruction storeInstr, int index, + Node1Impl dest, int k, Ssa::DefImpl def, int lower + | + flowStoreStepTC(vai, dest, index, k) and + nodeHasInstruction1(vai, vaiInstr, index) and + nodeHasOperand1(dest, storeInstr.getDestinationAddressOperand(), index - k) and + sv.getIRVariable() = vaiInstr.getIRVariable() and + lower = + pragma[only_bind_out](getMinIndirectionsForType(storeInstr + .getDestinationAddress() + .getResultType())) and + sv.getIndirection() = index + lower and + nodeHasInstruction1(store, storeInstr, index - k) and + def.getNode() = store and + def.hasIndexInBlock(bb, i) + ) + } + + predicate variableWrite(BasicBlock bb, int i, SourceVariable sv, boolean certain) { + variableWrite(bb, i, sv, certain, _) + } + + additional predicate variableRead( + BasicBlock bb, int i, SourceVariable sv, boolean certain, Node1Impl load + ) { + certain = true and + exists(Node1Impl vai, int index, VariableAddressInstruction vaiInstr, Ssa::UseImpl use | + flowLoadStepTC(vai, load, index) and + nodeHasInstruction1(vai, vaiInstr, index) and + sv.getIRVariable() = vaiInstr.getIRVariable() and + sv.getIndirection() = index and + use.getNode() = load and + use.hasIndexInBlock(bb, i) + ) + } + + predicate variableRead(BasicBlock bb, int i, SourceVariable sv, boolean certain) { + variableRead(bb, i, sv, certain, _) + } +} + +private module AliasedSsa = SsaImplCommon::Make; + +private newtype TAliasedNode = + TNode1(Node1Impl n) or + TPhiNode(AliasedSsa::DefinitionExt phi) { + phi instanceof AliasedSsa::PhiNode or + phi instanceof AliasedSsa::PhiReadNode + } + +abstract private class AliasedNode extends TAliasedNode { + abstract string toString(); + + Instruction asInstruction() { none() } + + abstract Cpp::Function getFunction(); + + abstract predicate isGLValue(); + + abstract Cpp::Type getType(); + + abstract Cpp::Location getLocation(); +} + +class AliasedNodeImpl = AliasedNode; + +private class Node1 extends AliasedNode, TNode1 { + Node1Impl n; + + Node1() { this = TNode1(n) } + + Node1Impl getImpl() { result = n } + + final override string toString() { result = n.toString() } + + final override Instruction asInstruction() { result = n.asInstruction() } + + final override Cpp::Function getFunction() { result = n.getFunction() } + + final override predicate isGLValue() { n.isGLValue() } + + final override Cpp::Type getType() { result = n.getType() } + + final override Cpp::Location getLocation() { result = n.getLocation() } +} + +private class PhiNode extends AliasedNode, TPhiNode { + AliasedSsa::DefinitionExt phi; + + PhiNode() { this = TPhiNode(phi) } + + final override string toString() { result = phi.toString() } + + AliasedSsa::DefinitionExt getPhi() { result = phi } + + final override Cpp::Function getFunction() { result = phi.getBasicBlock().getEnclosingFunction() } + + final override predicate isGLValue() { phi.getSourceVariable().isGLValue() } + + final override Cpp::Type getType() { result = phi.getSourceVariable().getType() } + + final override Cpp::Location getLocation() { result = phi.getLocation() } +} + +class AliasedPhiNodeImpl = PhiNode; + +private predicate step(SsaInput::SourceVariable sv, IRBlock bb1, int i1, AliasedNode node2) { + exists(AliasedSsa::DefinitionExt def, Node1Impl load, IRBlock bb2, int i2 | + AliasedSsa::adjacentDefReadExt(def, sv, bb1, i1, bb2, i2) and + SsaInput::variableRead(bb2, i2, sv, _, load) and + TNode1(load) = node2 + ) +} + +private predicate access(SsaInput::SourceVariable sv, IRBlock bb, int i, AliasedNode node1) { + exists(Node1Impl n | node1 = TNode1(n) | + SsaInput::variableWrite(bb, i, sv, _, n) + or + SsaInput::variableRead(bb, i, sv, _, n) + ) + or + node1.(PhiNode).getPhi().definesAt(sv, bb, i, _) +} + +private predicate stepToPhi(SsaInput::SourceVariable sv, IRBlock bb, int i, PhiNode node) { + exists(AliasedSsa::DefinitionExt phi | + AliasedSsa::lastRefRedefExt(_, sv, bb, i, phi) and + node.getPhi() = phi + ) +} + +predicate into(Public::Node node1, TPhiNode node2) { + exists(Node1Impl n | + node1 = Nodes::TNode1(n) and + aliasedFlow(TNode1(n), node2) + ) +} + +predicate step1(Public::Node node1, Public::Node node2) { + exists(Node1Impl n1, Node1Impl n2 | + node1 = Nodes::TNode1(n1) and + node2 = Nodes::TNode1(n2) and + aliasedFlow(TNode1(n1), TNode1(n2)) + ) +} + +predicate step2(TPhiNode node1, TPhiNode node2) { aliasedFlow(node1, node2) } + +predicate out(TPhiNode node1, Public::Node node2) { + exists(Node1Impl n | + node2 = Nodes::TNode1(n) and + aliasedFlow(node1, TNode1(n)) + ) +} + +private predicate aliasedFlow(AliasedNode node1, AliasedNode node2) { + node1 != node2 and + ( + exists(IRBlock bb, int i, SsaInput::SourceVariable sv | + access(sv, bb, i, node1) and + step(sv, bb, i, node2) + ) + or + exists(IRBlock bb, int i, SsaInput::SourceVariable sv | + access(sv, bb, i, node1) and + stepToPhi(sv, bb, i, node2) + ) + ) +} diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowNodes.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowNodes.qll new file mode 100644 index 000000000000..66601d16010e --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowNodes.qll @@ -0,0 +1,38 @@ +private import cpp +private import DataFlowPrivate +private import semmle.code.cpp.ir.IR +private import DataFlowImplCommon as DataFlowImplCommon +private import SsaInternals as Ssa +private import semmle.code.cpp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl +private import AliasedFlow + +/** + * The IR dataflow graph consists of the following nodes: + * - `Node1`, which injects most instructions and operands directly into the + * dataflow graph, as well as indirections of these instructions and + * operands. + * - `VariableNode`, which is used to model flow through global variables. + * - `PostUpdateNodeImpl`, which is used to model the state of an object after + * an update after a number of loads. + * - `SsaPhiNode`, which represents phi nodes as computed by the shared SSA + * library. + */ +cached +newtype TIRDataFlowNode = + TNode1(Node1Impl node) { DataFlowImplCommon::forceCachingInSameStage() } or + TGlobalLikeVariableNode(GlobalLikeVariable var, int indirectionIndex) { + indirectionIndex = + [getMinIndirectionsForType(var.getUnspecifiedType()) .. Ssa::getMaxIndirectionsForType(var.getUnspecifiedType())] + } or + TSsaIteratorNode(IteratorFlow::IteratorFlowNode n) or + TBodyLessParameterNodeImpl(Parameter p, int indirectionIndex) { + // Rule out parameters of catch blocks. + not exists(p.getCatchBlock()) and + // We subtract one because `getMaxIndirectionsForType` returns the maximum + // indirection for a glvalue of a given type, and this doesn't apply to + // parameters. + indirectionIndex = [0 .. Ssa::getMaxIndirectionsForType(p.getUnspecifiedType()) - 1] and + not any(InitializeParameterInstruction init).getParameter() = p + } or + TAliasedPhiNode(AliasedPhiNodeImpl n) or + TFlowSummaryNode(FlowSummaryImpl::Private::SummaryNode sn) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll index c169567e8b5e..176403d56946 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll @@ -12,6 +12,7 @@ private import ModelUtil private import semmle.code.cpp.models.interfaces.FunctionInputsAndOutputs as IO private import semmle.code.cpp.models.interfaces.DataFlow as DF private import semmle.code.cpp.dataflow.ExternalFlow as External +private import DataFlowNodes cached private module Cached { @@ -36,6 +37,28 @@ private module Cached { TSingleUseOperandNode0(Operand op) { not Ssa::ignoreOperand(op) and exists(Ssa::getIRRepresentationOfOperand(op)) } + + cached + newtype TIRDataFlowNode1 = + TNode0(Node0Impl node) or + TRawIndirectOperand0(Node0Impl node, int indirectionIndex) { + Ssa::hasRawIndirectOperand(node.asOperand(), indirectionIndex) + } or + TRawIndirectInstruction0(Node0Impl node, int indirectionIndex) { + not exists(node.asOperand()) and + Ssa::hasRawIndirectInstruction(node.asInstruction(), indirectionIndex) + } or + TFinalParameterNode(Ssa::FinalParameterUse use) or + TFinalGlobalValue(Ssa::GlobalUse use) or + TInitialGlobalValue(Ssa::GlobalDef def) or + TSsaPhiInputNode(Ssa::PhiNode phi, IRBlock input) { phi.hasInputFromBlock(_, _, _, _, input) } or + TSsaPhiNode(Ssa::PhiNode phi) or + TPostUpdateNodeImpl(Operand operand, int indirectionIndex) { + operand = any(FieldAddress fa).getObjectAddressOperand() and + indirectionIndex = [0 .. Ssa::countIndirectionsForCppType(Ssa::getLanguageType(operand))] + or + Ssa::isModifiableByCall(operand, indirectionIndex) + } } /** @@ -61,36 +84,48 @@ private module Cached { } } +/** + * An operand that is defined by a `FieldAddressInstruction`. + */ +class FieldAddress extends Operand { + FieldAddressInstruction fai; + + FieldAddress() { fai = this.getDef() and not Ssa::ignoreOperand(this) } + + /** Gets the field associated with this instruction. */ + Field getField() { result = fai.getField() } + + /** Gets the instruction whose result provides the address of the object containing the field. */ + Instruction getObjectAddress() { result = fai.getObjectAddress() } + + /** Gets the operand that provides the address of the object containing the field. */ + Operand getObjectAddressOperand() { result = fai.getObjectAddressOperand() } +} + import Cached private import Nodes0 -/** - * A module for calculating the number of stars (i.e., `*`s) needed for various - * dataflow node `toString` predicates. - */ -module NodeStars { - private int getNumberOfIndirections(Node n) { - result = n.(RawIndirectOperand).getIndirectionIndex() - or - result = n.(RawIndirectInstruction).getIndirectionIndex() - or - result = n.(VariableNode).getIndirectionIndex() - or - result = n.(PostUpdateNodeImpl).getIndirectionIndex() - or - result = n.(FinalParameterNode).getIndirectionIndex() - or - result = n.(BodyLessParameterNodeImpl).getIndirectionIndex() - } +class RawIndirectOperand extends IndirectOperand { + RawIndirectOperand() { node.isRaw() } - /** - * Gets the number of stars (i.e., `*`s) needed to produce the `toString` - * output for `n`. - */ - string stars(Node n) { result = repeatStars(getNumberOfIndirections(n)) } + int getIndirectionIndex() { exists(Operand op | node.hasOperandAndIndirectionIndex(op, result)) } } -import NodeStars +class RawIndirectInstruction extends IndirectInstruction { + RawIndirectInstruction() { node.isRaw() } + + int getIndirectionIndex() { node.hasInstructionAndIndirectionIndex(_, result) } +} + +int getNumberOfIndirections0(Node1Impl n) { + n = TRawIndirectOperand0(_, result) + or + n = TRawIndirectInstruction0(_, result) + or + result = n.(FinalParameterNode0).getIndirectionIndex() +} + +private string stars0(Node1Impl n) { result = repeatStars(getNumberOfIndirections0(n)) } /** * A cut-down `DataFlow::Node` class that does not depend on the output of SSA. @@ -138,6 +173,12 @@ class Node0Impl extends TIRDataFlowNode0 { /** Holds if the value of this node is a glvalue */ predicate isGLValue() { none() } // overridden in subclasses + + final predicate hasIndexInBlock(IRBlock block, int i) { + this.asInstruction() = block.getInstruction(i) + or + this.asOperand().getUse() = block.getInstruction(i) + } } /** @@ -247,6 +288,157 @@ private class SingleUseOperandNode0 extends OperandNode0, TSingleUseOperandNode0 SingleUseOperandNode0() { this = TSingleUseOperandNode0(op) } } +class Node1Impl extends TIRDataFlowNode1 { + /** + * INTERNAL: Do not use. + */ + Declaration getEnclosingCallable() { none() } // overridden in subclasses + + /** Gets the function to which this node belongs, if any. */ + Declaration getFunction() { none() } // overridden in subclasses + + /** + * Gets the type of this node. + * + * If `isGLValue()` holds, then the type of this node + * should be thought of as "pointer to `getType()`". + */ + DataFlowType getType() { + none() // overridden in subclasses + } + + /** Gets the location of this node. */ + final Location getLocation() { result = this.getLocationImpl() } + + /** INTERNAL: Do not use. */ + Location getLocationImpl() { + none() // overridden by subclasses + } + + /** INTERNAL: Do not use. */ + string toStringImpl() { + none() // overridden by subclasses + } + + Instruction asInstruction() { + none() // overridden by subclasses + } + + Operand asOperand() { + none() // overridden by subclasses + } + + /** Gets a textual representation of this node. */ + final string toString() { result = this.toStringImpl() } + + /** Holds if the value of this node is a glvalue */ + predicate isGLValue() { none() } // overridden in subclasses + + predicate hasIndexInBlock(IRBlock block, int i) { + none() // overridden in subclasses + } +} + +class Node0 extends Node1Impl, TNode0 { + Node0Impl node; + + Node0() { this = TNode0(node) } + + override Declaration getEnclosingCallable() { result = node.getEnclosingCallable() } + + override Declaration getFunction() { result = node.getFunction() } + + override DataFlowType getType() { result = node.getType() } + + override Location getLocationImpl() { result = node.getLocation() } + + override string toStringImpl() { result = node.toString() } + + override Instruction asInstruction() { result = node.asInstruction() } + + override Operand asOperand() { result = node.asOperand() } + + override predicate isGLValue() { node.isGLValue() } + + override predicate hasIndexInBlock(IRBlock block, int i) { node.hasIndexInBlock(block, i) } +} + +class OperandNode1 extends Node1Impl, Node0 { + override OperandNode0 node; + + Operand getOperand() { result = node.getOperand() } +} + +class InstructionNode1 extends Node1Impl, Node0 { + override InstructionNode0 node; + + Instruction getInstruction() { result = node.getInstruction() } +} + +Node0 operandNode0(Operand op) { result.asOperand() = op } + +Node0 instructionNode0(Instruction i) { result.asInstruction() = i } + +private class RawIndirectOperand0 extends Node1Impl, TRawIndirectOperand0 { + Node0Impl node; + int indirectionIndex; + + RawIndirectOperand0() { this = TRawIndirectOperand0(node, indirectionIndex) } + + override Declaration getEnclosingCallable() { result = node.getEnclosingCallable() } + + override Declaration getFunction() { result = node.getFunction() } + + override predicate isGLValue() { node.asOperand().isGLValue() } + + override DataFlowType getType() { + exists(int sub, DataFlowType type, boolean isGLValue | + type = getOperandType(node.asOperand(), isGLValue) and + if isGLValue = true then sub = 1 else sub = 0 + | + result = getTypeImpl(type.getUnderlyingType(), indirectionIndex - sub) + ) + } + + override Location getLocationImpl() { result = node.getLocation() } + + override string toStringImpl() { + result = stars0(this) + operandNode0(node.asOperand()).toStringImpl() + } + + override predicate hasIndexInBlock(IRBlock block, int i) { node.hasIndexInBlock(block, i) } +} + +private class RawIndirectInstruction0 extends Node1Impl, TRawIndirectInstruction0 { + Node0Impl node; + int indirectionIndex; + + RawIndirectInstruction0() { this = TRawIndirectInstruction0(node, indirectionIndex) } + + override Declaration getEnclosingCallable() { result = node.getEnclosingCallable() } + + override Declaration getFunction() { result = node.getFunction() } + + override predicate isGLValue() { node.asInstruction().isGLValue() } + + override DataFlowType getType() { + exists(int sub, DataFlowType type, boolean isGLValue | + type = getInstructionType(node.asInstruction(), isGLValue) and + if isGLValue = true then sub = 1 else sub = 0 + | + result = getTypeImpl(type.getUnderlyingType(), indirectionIndex - sub) + ) + } + + override Location getLocationImpl() { result = node.getLocation() } + + override string toStringImpl() { + result = stars0(this) + instructionNode0(node.asInstruction()).toStringImpl() + } + + override predicate hasIndexInBlock(IRBlock block, int i) { node.hasIndexInBlock(block, i) } +} + private module IndirectOperands { /** * INTERNAL: Do not use. @@ -257,19 +449,50 @@ private module IndirectOperands { * Note: Unlike `RawIndirectOperand`, a value of type `IndirectOperand` may * be an `OperandNode`. */ - abstract class IndirectOperand extends Node { + abstract class IndirectOperand1 extends Node1Impl { /** Gets the underlying operand and the underlying indirection index. */ abstract predicate hasOperandAndIndirectionIndex(Operand operand, int indirectionIndex); + + abstract predicate isRaw(); } - private class IndirectOperandFromRaw extends IndirectOperand instanceof RawIndirectOperand { + private class RawIndirectOperand1 extends Node1Impl { + int indirectionIndex; + Operand operand; + + RawIndirectOperand1() { + exists(Node0Impl node | operand = node.asOperand() | + this = TRawIndirectOperand0(node, indirectionIndex) + or + this = TRawIndirectInstruction0(node, indirectionIndex) + ) + } + + /** Gets the operand associated with this node. */ + Operand getOperand() { result = operand } + + /** Gets the underlying indirection index. */ + int getIndirectionIndex() { result = indirectionIndex } + + override predicate hasIndexInBlock(IRBlock block, int i) { + exists(Node0Impl node | operand = node.asOperand() and node.hasIndexInBlock(block, i) | + this = TRawIndirectOperand0(node, indirectionIndex) + or + this = TRawIndirectInstruction0(node, indirectionIndex) + ) + } + } + + private class IndirectOperandFromRaw extends IndirectOperand1 instanceof RawIndirectOperand1 { override predicate hasOperandAndIndirectionIndex(Operand operand, int indirectionIndex) { - operand = RawIndirectOperand.super.getOperand() and - indirectionIndex = RawIndirectOperand.super.getIndirectionIndex() + operand = RawIndirectOperand1.super.getOperand() and + indirectionIndex = RawIndirectOperand1.super.getIndirectionIndex() } + + final override predicate isRaw() { any() } } - private class IndirectOperandFromIRRepr extends IndirectOperand { + private class IndirectOperandFromIRRepr extends IndirectOperand1 { Operand operand; int indirectionIndex; @@ -277,13 +500,15 @@ private module IndirectOperands { exists(Operand repr, int indirectionIndexRepr | Ssa::hasIRRepresentationOfIndirectOperand(operand, indirectionIndex, repr, indirectionIndexRepr) and - nodeHasOperand(this, repr, indirectionIndexRepr) + nodeHasOperand1(this, repr, indirectionIndexRepr) ) } override predicate hasOperandAndIndirectionIndex(Operand op, int index) { op = operand and index = indirectionIndex } + + final override predicate isRaw() { none() } } } @@ -299,20 +524,57 @@ private module IndirectInstructions { * Note: Unlike `RawIndirectInstruction`, a value of type `IndirectInstruction` may * be an `InstructionNode`. */ - abstract class IndirectInstruction extends Node { + abstract class IndirectInstruction1 extends Node1Impl { /** Gets the underlying operand and the underlying indirection index. */ abstract predicate hasInstructionAndIndirectionIndex(Instruction instr, int index); + + abstract predicate isRaw(); + } + + /** + * INTERNAL: Do not use. + * + * A node that represents the indirect value of an instruction in the IR + * after a number of loads. + */ + private class RawIndirectInstruction1 extends Node1Impl { + int indirectionIndex; + Instruction instr; + + RawIndirectInstruction1() { + exists(Node0Impl node | instr = node.asInstruction() | + this = TRawIndirectOperand0(node, indirectionIndex) + or + this = TRawIndirectInstruction0(node, indirectionIndex) + ) + } + + /** Gets the instruction associated with this node. */ + Instruction getInstruction() { result = instr } + + /** Gets the underlying indirection index. */ + int getIndirectionIndex() { result = indirectionIndex } + + override predicate hasIndexInBlock(IRBlock block, int i) { + exists(Node0Impl node | instr = node.asInstruction() and node.hasIndexInBlock(block, i) | + this = TRawIndirectOperand0(node, indirectionIndex) + or + this = TRawIndirectInstruction0(node, indirectionIndex) + ) + } } - private class IndirectInstructionFromRaw extends IndirectInstruction instanceof RawIndirectInstruction + private class IndirectInstructionFromRaw extends IndirectInstruction1 instanceof RawIndirectInstruction1 { override predicate hasInstructionAndIndirectionIndex(Instruction instr, int index) { - instr = RawIndirectInstruction.super.getInstruction() and - index = RawIndirectInstruction.super.getIndirectionIndex() + instr = RawIndirectInstruction1.super.getInstruction() and + index = RawIndirectInstruction1.super.getIndirectionIndex() } + + final override predicate isRaw() { any() } } - private class IndirectInstructionFromIRRepr extends IndirectInstruction { + private class IndirectInstructionFromIRRepr extends IndirectInstruction1 { Instruction instr; int indirectionIndex; @@ -320,18 +582,550 @@ private module IndirectInstructions { exists(Instruction repr, int indirectionIndexRepr | Ssa::hasIRRepresentationOfIndirectInstruction(instr, indirectionIndex, repr, indirectionIndexRepr) and - nodeHasInstruction(this, repr, indirectionIndexRepr) + nodeHasInstruction1(this, repr, indirectionIndexRepr) ) } override predicate hasInstructionAndIndirectionIndex(Instruction i, int index) { i = instr and index = indirectionIndex } + + final override predicate isRaw() { none() } } } import IndirectInstructions +/** + * INTERNAL: do not use. + * + * A node representing the value of an update parameter + * just before reaching the end of a function. + */ +class FinalParameterNode0 extends Node1Impl, TFinalParameterNode { + Ssa::FinalParameterUse use; + + FinalParameterNode0() { this = TFinalParameterNode(use) } + + /** Gets the parameter associated with this final use. */ + Parameter getParameter() { result = use.getParameter() } + + Ssa::FinalParameterUse getUse() { result = use } + + /** Gets the underlying indirection index. */ + int getIndirectionIndex() { result = use.getIndirectionIndex() } + + /** Gets the argument index associated with this final use. */ + final int getArgumentIndex() { result = this.getParameter().getIndex() } + + override Declaration getFunction() { result = use.getParameter().getFunction() } + + override Declaration getEnclosingCallable() { result = this.getFunction() } + + override DataFlowType getType() { result = use.getType() } + + final override Location getLocationImpl() { result = use.getLocation() } + + override string toStringImpl() { result = stars0(this) + this.getParameter().toString() } +} + +/** + * INTERNAL: do not use. + * + * A node representing the value of a global variable just before returning + * from a function body. + */ +class FinalGlobalValue0 extends Node1Impl, TFinalGlobalValue { + Ssa::GlobalUse use; + + FinalGlobalValue0() { this = TFinalGlobalValue(use) } + + override Declaration getEnclosingCallable() { result = this.getFunction() } + + override Declaration getFunction() { result = use.getIRFunction().getFunction() } + + override DataFlowType getType() { + result = getTypeImpl(use.getUnderlyingType(), use.getIndirectionIndex() - 1) + } + + final override Location getLocationImpl() { result = use.getLocation() } + + override string toStringImpl() { result = use.toString() } + + Ssa::GlobalUse getGlobalUse() { result = use } +} + +/** + * INTERNAL: do not use. + * + * A node representing the value of a global variable just after entering + * a function body. + */ +class InitialGlobalValue0 extends Node1Impl, TInitialGlobalValue { + Ssa::GlobalDef def; + + InitialGlobalValue0() { this = TInitialGlobalValue(def) } + + override Declaration getEnclosingCallable() { result = this.getFunction() } + + override Declaration getFunction() { result = def.getFunction() } + + Ssa::GlobalDef getDef() { result = def } + + final override predicate isGLValue() { def.getIndirectionIndex() = 0 } + + override DataFlowType getType() { + exists(DataFlowType type | + type = def.getUnderlyingType() and + if this.isGLValue() + then result = type + else result = getTypeImpl(type, def.getIndirectionIndex() - 1) + ) + } + + final override Location getLocationImpl() { result = def.getLocation() } + + override string toStringImpl() { result = def.toString() } +} + +/** + * INTERNAL: do not use. + * + * A phi node produced by the shared SSA library, viewed as a node in a data flow graph. + */ +class SsaPhiNode0 extends Node1Impl, TSsaPhiNode { + Ssa::PhiNode phi; + + SsaPhiNode0() { this = TSsaPhiNode(phi) } + + /** Gets the phi node associated with this node. */ + Ssa::PhiNode getPhiNode() { result = phi } + + override Declaration getEnclosingCallable() { result = this.getFunction() } + + override Declaration getFunction() { result = phi.getBasicBlock().getEnclosingFunction() } + + override DataFlowType getType() { + exists(Ssa::SourceVariable sv | + this.getPhiNode().definesAt(sv, _, _, _) and + result = sv.getType() + ) + } + + /** Gets the basic block to which this phi node belongs. */ + IRBlock getBasicBlock() { result = phi.getBasicBlock() } + + override predicate isGLValue() { phi.getSourceVariable().isGLValue() } + + final override Location getLocationImpl() { result = phi.getBasicBlock().getLocation() } + + override string toStringImpl() { result = phi.toString() } + + /** + * Gets a node that is used as input to this phi node. + * `fromBackEdge` is true if data flows along a back-edge, + * and `false` otherwise. + */ + cached + final Node getAnInput(boolean fromBackEdge) { + result.(SsaPhiInputNode).getPhiNode() = phi and + exists(IRBlock bPhi, IRBlock bResult | + bPhi = phi.getBasicBlock() and bResult = result.getBasicBlock() + | + if bPhi.dominates(bResult) then fromBackEdge = true else fromBackEdge = false + ) + } + + /** Gets a node that is used as input to this phi node. */ + final Node getAnInput() { result = this.getAnInput(_) } + + /** Gets the source variable underlying this phi node. */ + Ssa::SourceVariable getSourceVariable() { result = phi.getSourceVariable() } + + /** + * Holds if this phi node is a phi-read node. + * + * Phi-read nodes are like normal phi nodes, but they are inserted based + * on reads instead of writes. + */ + predicate isPhiRead() { phi.isPhiRead() } +} + +/** + * INTERNAL: Do not use. + * + * A node that is used as an input to a phi node. + * + * This class exists to allow more powerful barrier guards. Consider this + * example: + * + * ```cpp + * int x = source(); + * if(!safe(x)) { + * x = clear(); + * } + * // phi node for x here + * sink(x); + * ``` + * + * At the phi node for `x` it is neither the case that `x` is dominated by + * `safe(x)`, or is the case that the phi is dominated by a clearing of `x`. + * + * By inserting a "phi input" node as the last entry in the basic block that + * defines the inputs to the phi we can conclude that each of those inputs are + * safe to pass to `sink`. + */ +class SsaPhiInputNode0 extends Node1Impl, TSsaPhiInputNode { + Ssa::PhiNode phi; + IRBlock block; + + SsaPhiInputNode0() { this = TSsaPhiInputNode(phi, block) } + + /** Gets the phi node associated with this node. */ + Ssa::PhiNode getPhiNode() { result = phi } + + /** Gets the basic block in which this input originates. */ + IRBlock getBlock() { result = block } + + override Declaration getEnclosingCallable() { result = this.getFunction() } + + override Declaration getFunction() { result = phi.getBasicBlock().getEnclosingFunction() } + + override DataFlowType getType() { result = this.getSourceVariable().getType() } + + override predicate isGLValue() { phi.getSourceVariable().isGLValue() } + + final override Location getLocationImpl() { result = block.getLastInstruction().getLocation() } + + override string toStringImpl() { result = "Phi input" } + + /** Gets the source variable underlying this phi node. */ + Ssa::SourceVariable getSourceVariable() { result = phi.getSourceVariable() } +} + +/** + * INTERNAL: Do not use. + */ +class PostUpdateNodeImpl0 extends PostUpdateNode0, TPostUpdateNodeImpl { + int indirectionIndex; + Operand operand; + + PostUpdateNodeImpl0() { this = TPostUpdateNodeImpl(operand, indirectionIndex) } + + override Declaration getFunction() { result = operand.getUse().getEnclosingFunction() } + + override Declaration getEnclosingCallable() { + result = this.getPreUpdateNode().getEnclosingCallable() + } + + override DataFlowType getType() { result = this.getPreUpdateNode().getType() } + + /** Gets the operand associated with this node. */ + Operand getOperand() { result = operand } + + /** Gets the indirection index associated with this node. */ + int getIndirectionIndex() { result = indirectionIndex } + + override Location getLocationImpl() { result = operand.getLocation() } + + final override Node1Impl getPreUpdateNode() { + indirectionIndex > 0 and + hasOperandAndIndex1(result, operand, indirectionIndex) + or + indirectionIndex = 0 and + result.asOperand() = operand + } +} + +/** + * INTERNAL: do not use. + * + * A node representing the indirection of a value after it + * has been returned from a function. + */ +class IndirectArgumentOutNode0 extends PostUpdateNodeImpl0 { + override ArgumentOperand operand; + + int getArgumentIndex() { + exists(CallInstruction call | call.getArgumentOperand(result) = operand) + } + + Operand getAddressOperand() { result = operand } + + CallInstruction getCallInstruction() { result.getAnArgumentOperand() = operand } + + /** + * Gets the `Function` that the call targets, if this is statically known. + */ + Function getStaticCallTarget() { result = this.getCallInstruction().getStaticCallTarget() } + + override string toStringImpl() { + exists(string prefix | + if this.getIndirectionIndex() > 0 then prefix = "" else prefix = "pointer to " + | + // This string should be unique enough to be helpful but common enough to + // avoid storing too many different strings. + result = prefix + this.getStaticCallTarget().getName() + " output argument" + or + not exists(this.getStaticCallTarget()) and + result = prefix + "output argument" + ) + } +} + +/** + * Holds if `node` is an indirect operand with columns `(operand, indirectionIndex)`, and + * `operand` represents a use of the fully converted value of `call`. + */ +private predicate hasOperand( + Node1Impl node, CallInstruction call, int indirectionIndex, Operand operand +) { + operandForFullyConvertedCall(operand, call) and + hasOperandAndIndex1(node, operand, indirectionIndex) +} + +/** + * Holds if `node` is an indirect instruction with columns `(instr, indirectionIndex)`, and + * `instr` represents a use of the fully converted value of `call`. + * + * Note that `hasOperand(node, _, _, _)` implies `not hasInstruction(node, _, _, _)`. + */ +private predicate hasInstruction( + Node1Impl node, CallInstruction call, int indirectionIndex, Instruction instr +) { + instructionForFullyConvertedCall(instr, call) and + hasInstructionAndIndex1(node, instr, indirectionIndex) +} + +class IndirectReturnOutNode0 extends Node1Impl { + CallInstruction call; + int indirectionIndex; + + IndirectReturnOutNode0() { + // Annoyingly, we need to pick the fully converted value as the output of the function to + // make flow through in the shared dataflow library work correctly. + hasOperand(this, call, indirectionIndex, _) + or + hasInstruction(this, call, indirectionIndex, _) + } + + CallInstruction getCallInstruction() { result = call } + + int getIndirectionIndex() { result = indirectionIndex } + + /** Gets the operand associated with this node, if any. */ + Operand getOperand() { hasOperand(this, call, indirectionIndex, result) } + + /** Gets the instruction associated with this node, if any. */ + Instruction getInstruction() { hasInstruction(this, call, indirectionIndex, result) } +} + +/** + * The value of an uninitialized local variable, viewed as a node in a data + * flow graph. + */ +class UninitializedNode0 extends Node1Impl { + Cpp::LocalVariable v; + + UninitializedNode0() { + exists(Ssa::DefinitionExt def, Ssa::SourceVariable sv | + def.getIndirectionIndex() = 0 and + def.getValue().asInstruction() instanceof UninitializedInstruction and + Ssa::defToNode(this, def, sv, _, _, _) and + v = sv.getBaseVariable().(Ssa::BaseIRVariable).getIRVariable().getAst() + ) + } + + /** Gets the uninitialized local variable corresponding to this node. */ + Cpp::LocalVariable getLocalVariable() { result = v } +} + +abstract class PostUpdateNode0 extends Node1Impl { + /** + * Gets the node before the state update. + */ + abstract Node1Impl getPreUpdateNode(); + + override DataFlowType getType() { result = this.getPreUpdateNode().getType() } +} + +predicate simpleInstructionLocalFlowStep(Operand opFrom, Instruction iTo) { + // Treat all conversions as flow, even conversions between different numeric types. + conversionFlow(opFrom, iTo, false, _) + or + iTo.(CopyInstruction).getSourceValueOperand() = opFrom +} + +predicate simpleOperandLocalFlowStep(Instruction iFrom, Operand opTo) { + not opTo instanceof MemoryOperand and + opTo.getDef() = iFrom +} + +private predicate indirectionOperandFlow(RawIndirectOperand0 nodeFrom, Node1Impl nodeTo) { + nodeFrom != nodeTo and + ( + // Reduce the indirection count by 1 if we're passing through a `LoadInstruction`. + exists(int ind, LoadInstruction load | + hasOperandAndIndex1(nodeFrom, load.getSourceAddressOperand(), ind) and + nodeHasInstruction1(nodeTo, load, ind - 1) + ) + or + // If an operand flows to an instruction, then the indirection of + // the operand also flows to the indirection of the instruction. + exists(Operand operand, Instruction instr, int indirectionIndex | + simpleInstructionLocalFlowStep(operand, instr) and + hasOperandAndIndex1(nodeFrom, operand, pragma[only_bind_into](indirectionIndex)) and + hasInstructionAndIndex1(nodeTo, instr, pragma[only_bind_into](indirectionIndex)) + ) + or + // If there's indirect flow to an operand, then there's also indirect + // flow to the operand after applying some pointer arithmetic. + exists(PointerArithmeticInstruction pointerArith, int indirectionIndex | + hasOperandAndIndex1(nodeFrom, pointerArith.getAnOperand(), + pragma[only_bind_into](indirectionIndex)) and + hasInstructionAndIndex1(nodeTo, pointerArith, pragma[only_bind_into](indirectionIndex)) + ) + ) +} + +/** + * Holds if `operand.getDef() = instr`, but there exists a `StoreInstruction` that + * writes to an address that is equivalent to the value computed by `instr` in + * between `instr` and `operand`, and therefore there should not be flow from `*instr` + * to `*operand`. + */ +pragma[nomagic] +private predicate isStoredToBetween(Instruction instr, Operand operand) { + simpleOperandLocalFlowStep(pragma[only_bind_into](instr), pragma[only_bind_into](operand)) and + exists(StoreInstruction store, IRBlock block, int storeIndex, int instrIndex, int operandIndex | + store.getDestinationAddress() = instr and + block.getInstruction(storeIndex) = store and + block.getInstruction(instrIndex) = instr and + block.getInstruction(operandIndex) = operand.getUse() and + instrIndex < storeIndex and + storeIndex < operandIndex + ) +} + +private predicate indirectionInstructionFlow( + RawIndirectInstruction0 nodeFrom, IndirectOperand1 nodeTo +) { + nodeFrom != nodeTo and + // If there's flow from an instruction to an operand, then there's also flow from the + // indirect instruction to the indirect operand. + exists(Operand operand, Instruction instr, int indirectionIndex | + simpleOperandLocalFlowStep(pragma[only_bind_into](instr), pragma[only_bind_into](operand)) + | + hasOperandAndIndex1(nodeTo, operand, pragma[only_bind_into](indirectionIndex)) and + hasInstructionAndIndex1(nodeFrom, instr, pragma[only_bind_into](indirectionIndex)) and + not isStoredToBetween(instr, operand) + ) +} + +private predicate modelFlow(Node1Impl nodeFrom, Node1Impl nodeTo, string model) { + exists( + CallInstruction call, DF::DataFlowFunction func, IO::FunctionInput modelIn, + IO::FunctionOutput modelOut + | + call.getStaticCallTarget() = func and + func.hasDataFlow(modelIn, modelOut) and + model = "DataFlowFunction" + | + nodeFrom = callInput0(call, modelIn) and + nodeTo = callOutput0(call, modelOut) + or + exists(int d | + nodeFrom = callInput0(call, modelIn, d) and + nodeTo = callOutput0(call, modelOut, d) + ) + ) +} + +private predicate reverseFlow(Node1Impl nodeFrom, Node1Impl nodeTo) { + reverseFlowOperand(nodeFrom, nodeTo) + or + reverseFlowInstruction(nodeFrom, nodeTo) +} + +pragma[noinline] +predicate outNodeHasAddressAndIndex( + IndirectArgumentOutNode0 out, Operand address, int indirectionIndex +) { + out.getAddressOperand() = address and + out.getIndirectionIndex() = indirectionIndex +} + +private predicate reverseFlowOperand(Node1Impl nodeFrom, IndirectReturnOutNode0 nodeTo) { + exists(Operand address, int indirectionIndex | + nodeHasOperand1(nodeTo, address, indirectionIndex) + | + exists(StoreInstruction store | + nodeHasInstruction1(nodeFrom, store, indirectionIndex - 1) and + store.getDestinationAddressOperand() = address + ) + or + // We also want a write coming out of an `OutNode` to flow `nodeTo`. + // This is different from `reverseFlowInstruction` since `nodeFrom` can never + // be an `OutNode` when it's defined by an instruction. + outNodeHasAddressAndIndex(nodeFrom, address, indirectionIndex) + ) +} + +private predicate reverseFlowInstruction(Node1Impl nodeFrom, IndirectReturnOutNode0 nodeTo) { + exists(Instruction address, int indirectionIndex | + nodeHasInstruction1(nodeTo, address, indirectionIndex) + | + exists(StoreInstruction store | + nodeHasInstruction1(nodeFrom, store, indirectionIndex - 1) and + store.getDestinationAddress() = address + ) + ) +} + +predicate simpleLocalFlowStep1(Node1Impl nodeFrom, Node1Impl nodeTo, string model) { + ( + // Post update node -> Node flow + Ssa::postUpdateFlow(nodeFrom, nodeTo) + or + // Def-use/Use-use flow + Ssa::ssaFlow(nodeFrom, nodeTo) + or + // Phi input -> Phi + nodeFrom.(SsaPhiInputNode0).getPhiNode() = nodeTo.(SsaPhiNode0).getPhiNode() + or + // Operand -> Instruction flow + simpleInstructionLocalFlowStep(nodeFrom.asOperand(), nodeTo.asInstruction()) + or + // Instruction -> Operand flow + exists(Instruction iFrom, Operand opTo | + iFrom = nodeFrom.asInstruction() and opTo = nodeTo.asOperand() + | + simpleOperandLocalFlowStep(iFrom, opTo) and + // Omit when the instruction node also represents the operand. + not iFrom = Ssa::getIRRepresentationOfOperand(opTo) + ) + or + // Phi node -> Node flow + Ssa::fromPhiNode(nodeFrom, nodeTo) + or + // Indirect operand -> (indirect) instruction flow + indirectionOperandFlow(nodeFrom, nodeTo) + or + // Indirect instruction -> indirect operand flow + indirectionInstructionFlow(nodeFrom, nodeTo) + ) and + model = "" + or + // Flow through modeled functions + modelFlow(nodeFrom, nodeTo, model) + or + // Reverse flow: data that flows from the definition node back into the indirection returned + // by a function. This allows data to flow 'in' through references returned by a modeled + // function such as `operator[]`. + reverseFlow(nodeFrom, nodeTo) and + model = "" +} + /** Gets the callable in which this node occurs. */ DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() } @@ -673,12 +1467,20 @@ predicate instructionForFullyConvertedCall(Instruction instr, CallInstruction ca } /** Holds if `node` represents the output node for `call`. */ -predicate simpleOutNode(Node node, CallInstruction call) { +predicate simpleOutNode1(Node1Impl node, CallInstruction call) { operandForFullyConvertedCall(node.asOperand(), call) or instructionForFullyConvertedCall(node.asInstruction(), call) } +/** Holds if `node` represents the output node for `call`. */ +predicate simpleOutNode(Node node, CallInstruction call) { + exists(Node1Impl n | + node = TNode1(n) and + simpleOutNode1(n, call) + ) +} + /** * A data flow node that represents the output of a call (for example, a * return value) at the call site. @@ -802,7 +1604,7 @@ predicate jumpStep(Node n1, Node n2) { or exists(Ssa::GlobalDef globalDef | v = globalDef.getVariable() and - n2.(InitialGlobalValue).getGlobalDef() = globalDef + n2.(InitialGlobalValue).getDef() = globalDef | globalDef.getIndirection() = getMinIndirectionForGlobalDef(globalDef) and v = n1.asVariable() @@ -898,6 +1700,34 @@ private predicate numberOfLoadsFromOperand( certain = true } +predicate hasOperandAndIndex1( + IndirectOperand1 indirectOperand, Operand operand, int indirectionIndex +) { + indirectOperand.hasOperandAndIndirectionIndex(operand, indirectionIndex) +} + +predicate hasInstructionAndIndex1( + IndirectInstruction1 indirectInstr, Instruction instr, int indirectionIndex +) { + indirectInstr.hasInstructionAndIndirectionIndex(instr, indirectionIndex) +} + +// Needed to join on both an operand and an index at the same time. +pragma[noinline] +predicate nodeHasOperand1(Node1Impl node, Operand operand, int indirectionIndex) { + node.asOperand() = operand and indirectionIndex = 0 + or + hasOperandAndIndex1(node, operand, indirectionIndex) +} + +// Needed to join on both an instruction and an index at the same time. +pragma[noinline] +predicate nodeHasInstruction1(Node1Impl node, Instruction instr, int indirectionIndex) { + node.asInstruction() = instr and indirectionIndex = 0 + or + hasInstructionAndIndex1(node, instr, indirectionIndex) +} + // Needed to join on both an operand and an index at the same time. pragma[noinline] predicate nodeHasOperand(Node node, Operand operand, int indirectionIndex) { @@ -1633,6 +2463,8 @@ private Instruction getAnInstruction(Node n) { or result = n.(SsaPhiNode).getPhiNode().getBasicBlock().getFirstInstruction() or + result = n.(AliasedPhiNode).getPhi().getPhi().getBasicBlock().getFirstInstruction() + or result = n.(SsaPhiInputNode).getBasicBlock().getFirstInstruction() or n.(IndirectInstruction).hasInstructionAndIndirectionIndex(result, _) @@ -1986,7 +2818,7 @@ module IteratorFlow { or exists(Ssa::UseImpl use | use.hasIndexInBlock(bb, i, sv) and - nodeTo = use.getNode() + nodeTo = TNode1(use.getNode()) ) } diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index c9e2a7136216..8fcd1fddaa7a 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -19,80 +19,8 @@ private import codeql.util.Unit private import Node0ToString private import DataFlowDispatch as DataFlowDispatch import ExprNodes - -/** - * The IR dataflow graph consists of the following nodes: - * - `Node0`, which injects most instructions and operands directly into the - * dataflow graph. - * - `VariableNode`, which is used to model flow through global variables. - * - `PostUpdateNodeImpl`, which is used to model the state of an object after - * an update after a number of loads. - * - `SsaPhiNode`, which represents phi nodes as computed by the shared SSA - * library. - * - `RawIndirectOperand`, which represents the value of `operand` after - * loading the address a number of times. - * - `RawIndirectInstruction`, which represents the value of `instr` after - * loading the address a number of times. - */ -cached -private newtype TIRDataFlowNode = - TNode0(Node0Impl node) { DataFlowImplCommon::forceCachingInSameStage() } or - TGlobalLikeVariableNode(GlobalLikeVariable var, int indirectionIndex) { - indirectionIndex = - [getMinIndirectionsForType(var.getUnspecifiedType()) .. Ssa::getMaxIndirectionsForType(var.getUnspecifiedType())] - } or - TPostUpdateNodeImpl(Operand operand, int indirectionIndex) { - operand = any(FieldAddress fa).getObjectAddressOperand() and - indirectionIndex = [0 .. Ssa::countIndirectionsForCppType(Ssa::getLanguageType(operand))] - or - Ssa::isModifiableByCall(operand, indirectionIndex) - } or - TSsaPhiInputNode(Ssa::PhiNode phi, IRBlock input) { phi.hasInputFromBlock(_, _, _, _, input) } or - TSsaPhiNode(Ssa::PhiNode phi) or - TSsaIteratorNode(IteratorFlow::IteratorFlowNode n) or - TRawIndirectOperand0(Node0Impl node, int indirectionIndex) { - Ssa::hasRawIndirectOperand(node.asOperand(), indirectionIndex) - } or - TRawIndirectInstruction0(Node0Impl node, int indirectionIndex) { - not exists(node.asOperand()) and - Ssa::hasRawIndirectInstruction(node.asInstruction(), indirectionIndex) - } or - TFinalParameterNode(Parameter p, int indirectionIndex) { - exists(Ssa::FinalParameterUse use | - use.getParameter() = p and - use.getIndirectionIndex() = indirectionIndex - ) - } or - TFinalGlobalValue(Ssa::GlobalUse globalUse) or - TInitialGlobalValue(Ssa::GlobalDef globalUse) or - TBodyLessParameterNodeImpl(Parameter p, int indirectionIndex) { - // Rule out parameters of catch blocks. - not exists(p.getCatchBlock()) and - // We subtract one because `getMaxIndirectionsForType` returns the maximum - // indirection for a glvalue of a given type, and this doesn't apply to - // parameters. - indirectionIndex = [0 .. Ssa::getMaxIndirectionsForType(p.getUnspecifiedType()) - 1] and - not any(InitializeParameterInstruction init).getParameter() = p - } or - TFlowSummaryNode(FlowSummaryImpl::Private::SummaryNode sn) - -/** - * An operand that is defined by a `FieldAddressInstruction`. - */ -class FieldAddress extends Operand { - FieldAddressInstruction fai; - - FieldAddress() { fai = this.getDef() and not Ssa::ignoreOperand(this) } - - /** Gets the field associated with this instruction. */ - Field getField() { result = fai.getField() } - - /** Gets the instruction whose result provides the address of the object containing the field. */ - Instruction getObjectAddress() { result = fai.getObjectAddress() } - - /** Gets the operand that provides the address of the object containing the field. */ - Operand getObjectAddressOperand() { result = fai.getObjectAddressOperand() } -} +private import DataFlowNodes +private import AliasedFlow as Aliased /** * Holds if `opFrom` is an operand whose value flows to the result of `instrTo`. @@ -180,18 +108,17 @@ class Node extends TIRDataFlowNode { * Note: Phi nodes are considered to be at index `-1`. */ final predicate hasIndexInBlock(IRBlock block, int i) { - this.asInstruction() = block.getInstruction(i) - or - this.asOperand().getUse() = block.getInstruction(i) - or this.(SsaPhiNode).getPhiNode().getBasicBlock() = block and i = -1 or this.(SsaPhiInputNode).getBlock() = block and i = block.getInstructionCount() or - this.(RawIndirectOperand).getOperand().getUse() = block.getInstruction(i) + this.(AliasedPhiNode).getPhi().getPhi().getBasicBlock() = block and i = -1 or - this.(RawIndirectInstruction).getInstruction() = block.getInstruction(i) + exists(Node1Impl n | + this = TNode1(n) and + n.hasIndexInBlock(block, i) + ) or this.(PostUpdateNode).getPreUpdateNode().hasIndexInBlock(block, i) } @@ -503,10 +430,10 @@ class Node extends TIRDataFlowNode { /** * A class that lifts pre-SSA dataflow nodes to regular dataflow nodes. */ -private class Node0 extends Node, TNode0 { - Node0Impl node; +private class Node1 extends Node, TNode1 { + Node1Impl node; - Node0() { this = TNode0(node) } + Node1() { this = TNode1(node) } override DataFlowCallable getEnclosingCallable() { result.asSourceCallable() = node.getEnclosingCallable() @@ -526,8 +453,8 @@ private class Node0 extends Node, TNode0 { /** * An instruction, viewed as a node in a data flow graph. */ -class InstructionNode extends Node0 { - override InstructionNode0 node; +class InstructionNode extends Node1 { + override InstructionNode1 node; Instruction instr; InstructionNode() { instr = node.getInstruction() } @@ -539,8 +466,8 @@ class InstructionNode extends Node0 { /** * An operand, viewed as a node in a data flow graph. */ -class OperandNode extends Node, Node0 { - override OperandNode0 node; +class OperandNode extends Node, Node1 { + override OperandNode1 node; Operand op; OperandNode() { op = node.getOperand() } @@ -549,6 +476,28 @@ class OperandNode extends Node, Node0 { Operand getOperand() { result = op } } +class IndirectOperand extends Node, Node1 { + override IndirectOperand1 node; + + predicate hasOperandAndIndirectionIndex(Operand operand, int indirectionIndex) { + node.hasOperandAndIndirectionIndex(operand, indirectionIndex) + } + + /** INTERNAL: Do not use. */ + predicate isRaw() { node.isRaw() } +} + +class IndirectInstruction extends Node, Node1 { + override IndirectInstruction1 node; + + predicate hasInstructionAndIndirectionIndex(Instruction instr, int indirectionIndex) { + node.hasInstructionAndIndirectionIndex(instr, indirectionIndex) + } + + /** INTERNAL: Do not use. */ + predicate isRaw() { node.isRaw() } +} + /** * INTERNAL: Do not use. * @@ -564,36 +513,20 @@ Type stripPointer(Type t) { result = t.(FunctionPointerIshType).getBaseType() } -/** - * INTERNAL: Do not use. - */ -class PostUpdateNodeImpl extends PartialDefinitionNode, TPostUpdateNodeImpl { - int indirectionIndex; +class PostUpdateNodeImpl extends Node1, PartialDefinitionNode { + override PostUpdateNodeImpl0 node; Operand operand; - PostUpdateNodeImpl() { this = TPostUpdateNodeImpl(operand, indirectionIndex) } + PostUpdateNodeImpl() { operand = node.getOperand() } - override Declaration getFunction() { result = operand.getUse().getEnclosingFunction() } + final override Node getPreUpdateNode() { result = TNode1(node.getPreUpdateNode()) } - override DataFlowCallable getEnclosingCallable() { - result = this.getPreUpdateNode().getEnclosingCallable() - } + override DataFlowType getType() { result = node.getType() } /** Gets the operand associated with this node. */ Operand getOperand() { result = operand } - /** Gets the indirection index associated with this node. */ - override int getIndirectionIndex() { result = indirectionIndex } - - override Location getLocationImpl() { result = operand.getLocation() } - - final override Node getPreUpdateNode() { - indirectionIndex > 0 and - hasOperandAndIndex(result, operand, indirectionIndex) - or - indirectionIndex = 0 and - result.asOperand() = operand - } + override int getIndirectionIndex() { result = node.getIndirectionIndex() } final override Expr getDefinedExpr() { result = operand.getDef().getUnconvertedResultExpression() @@ -617,120 +550,44 @@ class PostFieldUpdateNode extends PostUpdateNodeImpl { override string toStringImpl() { result = this.getPreUpdateNode() + " [post update]" } } -/** - * INTERNAL: do not use. - * - * A phi node produced by the shared SSA library, viewed as a node in a data flow graph. - */ -class SsaPhiNode extends Node, TSsaPhiNode { - Ssa::PhiNode phi; +class SsaPhiNode extends Node1 { + override SsaPhiNode0 node; - SsaPhiNode() { this = TSsaPhiNode(phi) } + predicate isPhiRead() { node.isPhiRead() } /** Gets the phi node associated with this node. */ - Ssa::PhiNode getPhiNode() { result = phi } - - override DataFlowCallable getEnclosingCallable() { - result.asSourceCallable() = this.getFunction() - } + Ssa::PhiNode getPhiNode() { result = node.getPhiNode() } - override Declaration getFunction() { result = phi.getBasicBlock().getEnclosingFunction() } - - override DataFlowType getType() { - exists(Ssa::SourceVariable sv | - this.getPhiNode().definesAt(sv, _, _, _) and - result = sv.getType() - ) - } - - override predicate isGLValue() { phi.getSourceVariable().isGLValue() } - - final override Location getLocationImpl() { result = phi.getBasicBlock().getLocation() } - - override string toStringImpl() { result = phi.toString() } + /** Gets the source variable underlying this phi node. */ + Ssa::SourceVariable getSourceVariable() { result = node.getSourceVariable() } /** * Gets a node that is used as input to this phi node. * `fromBackEdge` is true if data flows along a back-edge, * and `false` otherwise. */ - cached - final Node getAnInput(boolean fromBackEdge) { - result.(SsaPhiInputNode).getPhiNode() = phi and - exists(IRBlock bPhi, IRBlock bResult | - bPhi = phi.getBasicBlock() and bResult = result.getBasicBlock() - | - if bPhi.dominates(bResult) then fromBackEdge = true else fromBackEdge = false - ) - } + final Node getAnInput(boolean fromBackEdge) { result = node.getAnInput(fromBackEdge) } /** Gets a node that is used as input to this phi node. */ - final Node getAnInput() { result = this.getAnInput(_) } - - /** Gets the source variable underlying this phi node. */ - Ssa::SourceVariable getSourceVariable() { result = phi.getSourceVariable() } - - /** - * Holds if this phi node is a phi-read node. - * - * Phi-read nodes are like normal phi nodes, but they are inserted based - * on reads instead of writes. - */ - predicate isPhiRead() { phi.isPhiRead() } + final Node getAnInput() { result = node.getAnInput() } } -/** - * INTERNAL: Do not use. - * - * A node that is used as an input to a phi node. - * - * This class exists to allow more powerful barrier guards. Consider this - * example: - * - * ```cpp - * int x = source(); - * if(!safe(x)) { - * x = clear(); - * } - * // phi node for x here - * sink(x); - * ``` - * - * At the phi node for `x` it is neither the case that `x` is dominated by - * `safe(x)`, or is the case that the phi is dominated by a clearing of `x`. - * - * By inserting a "phi input" node as the last entry in the basic block that - * defines the inputs to the phi we can conclude that each of those inputs are - * safe to pass to `sink`. - */ -class SsaPhiInputNode extends Node, TSsaPhiInputNode { - Ssa::PhiNode phi; - IRBlock block; - - SsaPhiInputNode() { this = TSsaPhiInputNode(phi, block) } +class SsaPhiInputNode extends Node1 { + override SsaPhiInputNode0 node; /** Gets the phi node associated with this node. */ - Ssa::PhiNode getPhiNode() { result = phi } + Ssa::PhiNode getPhiNode() { result = node.getPhiNode() } /** Gets the basic block in which this input originates. */ - IRBlock getBlock() { result = block } - - override DataFlowCallable getEnclosingCallable() { - result.asSourceCallable() = this.getFunction() - } - - override Declaration getFunction() { result = phi.getBasicBlock().getEnclosingFunction() } - - override DataFlowType getType() { result = this.getSourceVariable().getType() } - - override predicate isGLValue() { phi.getSourceVariable().isGLValue() } - - final override Location getLocationImpl() { result = block.getLastInstruction().getLocation() } - - override string toStringImpl() { result = "Phi input" } + IRBlock getBlock() { result = node.getBlock() } /** Gets the source variable underlying this phi node. */ - Ssa::SourceVariable getSourceVariable() { result = phi.getSourceVariable() } + Ssa::SourceVariable getSourceVariable() { result = node.getSourceVariable() } +} + +private SsaPhiInputNode ssaPhiInputNode(Ssa::PhiNode phi, IRBlock block) { + result.getPhiNode() = phi and + result.getBlock() = block } /** @@ -792,65 +649,16 @@ class SideEffectOperandNode extends Node instanceof IndirectOperand { Expr getArgument() { result = call.getArgument(argumentIndex).getUnconvertedResultExpression() } } -/** - * INTERNAL: do not use. - * - * A node representing the value of a global variable just before returning - * from a function body. - */ -class FinalGlobalValue extends Node, TFinalGlobalValue { - Ssa::GlobalUse globalUse; - - FinalGlobalValue() { this = TFinalGlobalValue(globalUse) } - - /** Gets the underlying SSA use. */ - Ssa::GlobalUse getGlobalUse() { result = globalUse } - - override DataFlowCallable getEnclosingCallable() { - result.asSourceCallable() = this.getFunction() - } - - override Declaration getFunction() { result = globalUse.getIRFunction().getFunction() } - - override DataFlowType getType() { - exists(int indirectionIndex | - indirectionIndex = globalUse.getIndirectionIndex() and - result = getTypeImpl(globalUse.getUnderlyingType(), indirectionIndex - 1) - ) - } - - final override Location getLocationImpl() { result = globalUse.getLocation() } +class FinalGlobalValue extends Node, Node1 { + override FinalGlobalValue0 node; - override string toStringImpl() { result = globalUse.toString() } + Ssa::GlobalUse getGlobalUse() { result = node.getGlobalUse() } } -/** - * INTERNAL: do not use. - * - * A node representing the value of a global variable just after entering - * a function body. - */ -class InitialGlobalValue extends Node, TInitialGlobalValue { - Ssa::GlobalDef globalDef; - - InitialGlobalValue() { this = TInitialGlobalValue(globalDef) } - - /** Gets the underlying SSA definition. */ - Ssa::GlobalDef getGlobalDef() { result = globalDef } +class InitialGlobalValue extends Node, Node1 { + override InitialGlobalValue0 node; - override DataFlowCallable getEnclosingCallable() { - result.asSourceCallable() = this.getFunction() - } - - override Declaration getFunction() { result = globalDef.getFunction() } - - final override predicate isGLValue() { globalDef.getIndirectionIndex() = 0 } - - override DataFlowType getType() { result = globalDef.getUnderlyingType() } - - final override Location getLocationImpl() { result = globalDef.getLocation() } - - override string toStringImpl() { result = globalDef.toString() } + Ssa::GlobalDef getDef() { result = node.getDef() } } /** @@ -889,6 +697,26 @@ class BodyLessParameterNodeImpl extends Node, TBodyLessParameterNodeImpl { } } +class AliasedPhiNode extends Node, TAliasedPhiNode { + Aliased::AliasedPhiNodeImpl phi; + + AliasedPhiNode() { this = TAliasedPhiNode(phi) } + + Aliased::AliasedPhiNodeImpl getPhi() { result = phi } + + override DataFlowCallable getEnclosingCallable() { + result.asSourceCallable() = this.getFunction() + } + + override Declaration getFunction() { result = phi.getFunction() } + + override DataFlowType getType() { result = phi.getPhi().getSourceVariable().getType() } + + final override Location getLocationImpl() { result = phi.getLocation() } + + final override string toStringImpl() { result = phi.toString() } +} + /** * A data-flow node used to model flow summaries. That is, a dataflow node * that is synthesized to represent a parameter, return value, or other part @@ -966,53 +794,20 @@ class IndirectReturnNode extends Node { * has been returned from a function. */ class IndirectArgumentOutNode extends PostUpdateNodeImpl { - override ArgumentOperand operand; + override IndirectArgumentOutNode0 node; - int getArgumentIndex() { - exists(CallInstruction call | call.getArgumentOperand(result) = operand) - } + int getArgumentIndex() { result = node.getArgumentIndex() } - Operand getAddressOperand() { result = operand } + Operand getAddressOperand() { result = node.getAddressOperand() } - CallInstruction getCallInstruction() { result.getAnArgumentOperand() = operand } + CallInstruction getCallInstruction() { result = node.getCallInstruction() } /** * Gets the `Function` that the call targets, if this is statically known. */ - Function getStaticCallTarget() { result = this.getCallInstruction().getStaticCallTarget() } - - override string toStringImpl() { - exists(string prefix | if indirectionIndex > 0 then prefix = "" else prefix = "pointer to " | - // This string should be unique enough to be helpful but common enough to - // avoid storing too many different strings. - result = prefix + this.getStaticCallTarget().getName() + " output argument" - or - not exists(this.getStaticCallTarget()) and - result = prefix + "output argument" - ) - } -} + Function getStaticCallTarget() { result = node.getStaticCallTarget() } -/** - * Holds if `node` is an indirect operand with columns `(operand, indirectionIndex)`, and - * `operand` represents a use of the fully converted value of `call`. - */ -private predicate hasOperand(Node node, CallInstruction call, int indirectionIndex, Operand operand) { - operandForFullyConvertedCall(operand, call) and - hasOperandAndIndex(node, operand, indirectionIndex) -} - -/** - * Holds if `node` is an indirect instruction with columns `(instr, indirectionIndex)`, and - * `instr` represents a use of the fully converted value of `call`. - * - * Note that `hasOperand(node, _, _, _)` implies `not hasInstruction(node, _, _, _)`. - */ -private predicate hasInstruction( - Node node, CallInstruction call, int indirectionIndex, Instruction instr -) { - instructionForFullyConvertedCall(instr, call) and - hasInstructionAndIndex(node, instr, indirectionIndex) + override string toStringImpl() { result = node.toString() } } /** @@ -1021,27 +816,18 @@ private predicate hasInstruction( * A node representing the indirect value of a function call (i.e., a value hidden * behind a number of indirections). */ -class IndirectReturnOutNode extends Node { - CallInstruction call; - int indirectionIndex; +class IndirectReturnOutNode extends Node1 { + override IndirectReturnOutNode0 node; - IndirectReturnOutNode() { - // Annoyingly, we need to pick the fully converted value as the output of the function to - // make flow through in the shared dataflow library work correctly. - hasOperand(this, call, indirectionIndex, _) - or - hasInstruction(this, call, indirectionIndex, _) - } + CallInstruction getCallInstruction() { result = node.getCallInstruction() } - CallInstruction getCallInstruction() { result = call } - - int getIndirectionIndex() { result = indirectionIndex } + int getIndirectionIndex() { result = node.getIndirectionIndex() } /** Gets the operand associated with this node, if any. */ - Operand getOperand() { hasOperand(this, call, indirectionIndex, result) } + Operand getOperand() { result = node.getOperand() } /** Gets the instruction associated with this node, if any. */ - Instruction getInstruction() { hasInstruction(this, call, indirectionIndex, result) } + Instruction getInstruction() { result = node.getInstruction() } } /** @@ -1065,6 +851,8 @@ private class PostIndirectReturnOutNode extends IndirectReturnOutNode, PostUpdat } override Node getPreUpdateNode() { result = this } + + override DataFlowType getType() { result = node.getType() } } /** @@ -1110,211 +898,36 @@ Type getTypeImpl(Type t, int indirectionIndex) { result instanceof UnknownType } -private module RawIndirectNodes { - /** - * INTERNAL: Do not use. - * - * A node that represents the indirect value of an operand in the IR - * after `index` number of loads. - */ - private class RawIndirectOperand0 extends Node, TRawIndirectOperand0 { - Node0Impl node; - int indirectionIndex; - - RawIndirectOperand0() { this = TRawIndirectOperand0(node, indirectionIndex) } - - /** Gets the underlying instruction. */ - Operand getOperand() { result = node.asOperand() } - - /** Gets the underlying indirection index. */ - int getIndirectionIndex() { result = indirectionIndex } - - override Declaration getFunction() { result = node.getFunction() } - - override DataFlowCallable getEnclosingCallable() { - result.asSourceCallable() = node.getEnclosingCallable() - } - - override predicate isGLValue() { this.getOperand().isGLValue() } - - override DataFlowType getType() { - exists(int sub, DataFlowType type, boolean isGLValue | - type = getOperandType(this.getOperand(), isGLValue) and - if isGLValue = true then sub = 1 else sub = 0 - | - result = getTypeImpl(type.getUnderlyingType(), indirectionIndex - sub) - ) - } - - final override Location getLocationImpl() { - if exists(this.getOperand().getLocation()) - then result = this.getOperand().getLocation() - else result instanceof UnknownDefaultLocation - } - - override string toStringImpl() { - result = stars(this) + operandNode(this.getOperand()).toStringImpl() - } - } - - /** - * INTERNAL: Do not use. - * - * A node that represents the indirect value of an instruction in the IR - * after `index` number of loads. - */ - private class RawIndirectInstruction0 extends Node, TRawIndirectInstruction0 { - Node0Impl node; - int indirectionIndex; - - RawIndirectInstruction0() { this = TRawIndirectInstruction0(node, indirectionIndex) } - - /** Gets the underlying instruction. */ - Instruction getInstruction() { result = node.asInstruction() } - - /** Gets the underlying indirection index. */ - int getIndirectionIndex() { result = indirectionIndex } - - override Declaration getFunction() { result = node.getFunction() } - - override DataFlowCallable getEnclosingCallable() { - result.asSourceCallable() = node.getEnclosingCallable() - } - - override predicate isGLValue() { this.getInstruction().isGLValue() } - - override DataFlowType getType() { - exists(int sub, DataFlowType type, boolean isGLValue | - type = getInstructionType(this.getInstruction(), isGLValue) and - if isGLValue = true then sub = 1 else sub = 0 - | - result = getTypeImpl(type.getUnderlyingType(), indirectionIndex - sub) - ) - } - - final override Location getLocationImpl() { - if exists(this.getInstruction().getLocation()) - then result = this.getInstruction().getLocation() - else result instanceof UnknownDefaultLocation - } - - override string toStringImpl() { - result = stars(this) + instructionNode(this.getInstruction()).toStringImpl() - } - } - - /** - * INTERNAL: Do not use. - * - * A node that represents the indirect value of an operand in the IR - * after a number of loads. - */ - class RawIndirectOperand extends Node { - int indirectionIndex; - Operand operand; - - RawIndirectOperand() { - exists(Node0Impl node | operand = node.asOperand() | - this = TRawIndirectOperand0(node, indirectionIndex) - or - this = TRawIndirectInstruction0(node, indirectionIndex) - ) - } - - /** Gets the operand associated with this node. */ - Operand getOperand() { result = operand } - - /** Gets the underlying indirection index. */ - int getIndirectionIndex() { result = indirectionIndex } - } - - /** - * INTERNAL: Do not use. - * - * A node that represents the indirect value of an instruction in the IR - * after a number of loads. - */ - class RawIndirectInstruction extends Node { - int indirectionIndex; - Instruction instr; - - RawIndirectInstruction() { - exists(Node0Impl node | instr = node.asInstruction() | - this = TRawIndirectOperand0(node, indirectionIndex) - or - this = TRawIndirectInstruction0(node, indirectionIndex) - ) - } - - /** Gets the instruction associated with this node. */ - Instruction getInstruction() { result = instr } - - /** Gets the underlying indirection index. */ - int getIndirectionIndex() { result = indirectionIndex } - } -} - -import RawIndirectNodes - /** * INTERNAL: do not use. * * A node representing the value of an update parameter * just before reaching the end of a function. */ -class FinalParameterNode extends Node, TFinalParameterNode { - Parameter p; - int indirectionIndex; - - FinalParameterNode() { this = TFinalParameterNode(p, indirectionIndex) } +class FinalParameterNode extends Node, Node1 { + override FinalParameterNode0 node; /** Gets the parameter associated with this final use. */ - Parameter getParameter() { result = p } + Parameter getParameter() { result = node.getParameter() } /** Gets the underlying indirection index. */ - int getIndirectionIndex() { result = indirectionIndex } + int getIndirectionIndex() { result = node.getIndirectionIndex() } /** Gets the argument index associated with this final use. */ - final int getArgumentIndex() { result = p.getIndex() } - - override Declaration getFunction() { result = p.getFunction() } - - override DataFlowCallable getEnclosingCallable() { - result.asSourceCallable() = this.getFunction() - } - - override DataFlowType getType() { result = getTypeImpl(p.getUnderlyingType(), indirectionIndex) } + final int getArgumentIndex() { result = node.getArgumentIndex() } - final override Location getLocationImpl() { - // Parameters can have multiple locations. When there's a unique location we use - // that one, but if multiple locations exist we default to an unknown location. - result = unique( | | p.getLocation()) - or - not exists(unique( | | p.getLocation())) and - result instanceof UnknownDefaultLocation - } - - override string toStringImpl() { result = stars(this) + p.toString() } + Ssa::FinalParameterUse getUse() { result = node.getUse() } } /** * The value of an uninitialized local variable, viewed as a node in a data * flow graph. */ -class UninitializedNode extends Node { - LocalVariable v; - - UninitializedNode() { - exists(Ssa::DefinitionExt def, Ssa::SourceVariable sv | - def.getIndirectionIndex() = 0 and - def.getValue().asInstruction() instanceof UninitializedInstruction and - Ssa::defToNode(this, def, sv, _, _, _) and - v = sv.getBaseVariable().(Ssa::BaseIRVariable).getIRVariable().getAst() - ) - } +class UninitializedNode extends Node1 { + override UninitializedNode0 node; /** Gets the uninitialized local variable corresponding to this node. */ - LocalVariable getLocalVariable() { result = v } + LocalVariable getLocalVariable() { result = node.getLocalVariable() } } abstract private class AbstractParameterNode extends Node { @@ -1559,7 +1172,7 @@ abstract class PostUpdateNode extends Node { */ abstract Node getPreUpdateNode(); - final override DataFlowType getType() { result = this.getPreUpdateNode().getType() } + override DataFlowType getType() { result = this.getPreUpdateNode().getType() } } /** @@ -1750,67 +1363,6 @@ private module Cached { FlowSummaryImpl::Private::Steps::summaryThroughStepValue(nodeFrom, nodeTo, _) } - private predicate indirectionOperandFlow(RawIndirectOperand nodeFrom, Node nodeTo) { - nodeFrom != nodeTo and - ( - // Reduce the indirection count by 1 if we're passing through a `LoadInstruction`. - exists(int ind, LoadInstruction load | - hasOperandAndIndex(nodeFrom, load.getSourceAddressOperand(), ind) and - nodeHasInstruction(nodeTo, load, ind - 1) - ) - or - // If an operand flows to an instruction, then the indirection of - // the operand also flows to the indirection of the instruction. - exists(Operand operand, Instruction instr, int indirectionIndex | - simpleInstructionLocalFlowStep(operand, instr) and - hasOperandAndIndex(nodeFrom, operand, pragma[only_bind_into](indirectionIndex)) and - hasInstructionAndIndex(nodeTo, instr, pragma[only_bind_into](indirectionIndex)) - ) - or - // If there's indirect flow to an operand, then there's also indirect - // flow to the operand after applying some pointer arithmetic. - exists(PointerArithmeticInstruction pointerArith, int indirectionIndex | - hasOperandAndIndex(nodeFrom, pointerArith.getAnOperand(), - pragma[only_bind_into](indirectionIndex)) and - hasInstructionAndIndex(nodeTo, pointerArith, pragma[only_bind_into](indirectionIndex)) - ) - ) - } - - /** - * Holds if `operand.getDef() = instr`, but there exists a `StoreInstruction` that - * writes to an address that is equivalent to the value computed by `instr` in - * between `instr` and `operand`, and therefore there should not be flow from `*instr` - * to `*operand`. - */ - pragma[nomagic] - private predicate isStoredToBetween(Instruction instr, Operand operand) { - simpleOperandLocalFlowStep(pragma[only_bind_into](instr), pragma[only_bind_into](operand)) and - exists(StoreInstruction store, IRBlock block, int storeIndex, int instrIndex, int operandIndex | - store.getDestinationAddress() = instr and - block.getInstruction(storeIndex) = store and - block.getInstruction(instrIndex) = instr and - block.getInstruction(operandIndex) = operand.getUse() and - instrIndex < storeIndex and - storeIndex < operandIndex - ) - } - - private predicate indirectionInstructionFlow( - RawIndirectInstruction nodeFrom, IndirectOperand nodeTo - ) { - nodeFrom != nodeTo and - // If there's flow from an instruction to an operand, then there's also flow from the - // indirect instruction to the indirect operand. - exists(Operand operand, Instruction instr, int indirectionIndex | - simpleOperandLocalFlowStep(pragma[only_bind_into](instr), pragma[only_bind_into](operand)) - | - hasOperandAndIndex(nodeTo, operand, pragma[only_bind_into](indirectionIndex)) and - hasInstructionAndIndex(nodeFrom, instr, pragma[only_bind_into](indirectionIndex)) and - not isStoredToBetween(instr, operand) - ) - } - /** * INTERNAL: do not use. * @@ -1820,117 +1372,29 @@ private module Cached { */ cached predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo, string model) { + model = "" and ( - // Post update node -> Node flow - Ssa::postUpdateFlow(nodeFrom, nodeTo) - or - // Def-use/Use-use flow - Ssa::ssaFlow(nodeFrom, nodeTo) - or - // Phi input -> Phi - nodeFrom.(SsaPhiInputNode).getPhiNode() = nodeTo.(SsaPhiNode).getPhiNode() - or IteratorFlow::localFlowStep(nodeFrom, nodeTo) or - // Operand -> Instruction flow - simpleInstructionLocalFlowStep(nodeFrom.asOperand(), nodeTo.asInstruction()) + Aliased::into(nodeFrom, nodeTo.(AliasedPhiNode).getPhi()) or - // Instruction -> Operand flow - exists(Instruction iFrom, Operand opTo | - iFrom = nodeFrom.asInstruction() and opTo = nodeTo.asOperand() - | - simpleOperandLocalFlowStep(iFrom, opTo) and - // Omit when the instruction node also represents the operand. - not iFrom = Ssa::getIRRepresentationOfOperand(opTo) - ) + Aliased::step1(nodeFrom, nodeTo) or - // Phi node -> Node flow - Ssa::fromPhiNode(nodeFrom, nodeTo) + Aliased::step2(nodeFrom.(AliasedPhiNode).getPhi(), nodeTo.(AliasedPhiNode).getPhi()) or - // Indirect operand -> (indirect) instruction flow - indirectionOperandFlow(nodeFrom, nodeTo) - or - // Indirect instruction -> indirect operand flow - indirectionInstructionFlow(nodeFrom, nodeTo) - ) and - model = "" - or - // Flow through modeled functions - modelFlow(nodeFrom, nodeTo, model) + Aliased::out(nodeFrom.(AliasedPhiNode).getPhi(), nodeTo) + ) or - // Reverse flow: data that flows from the definition node back into the indirection returned - // by a function. This allows data to flow 'in' through references returned by a modeled - // function such as `operator[]`. - reverseFlow(nodeFrom, nodeTo) and - model = "" + exists(Node1Impl nFrom, Node1Impl nTo | + nodeFrom = TNode1(nFrom) and + nodeTo = TNode1(nTo) and + simpleLocalFlowStep1(nFrom, nTo, model) + ) or // models-as-data summarized flow FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom.(FlowSummaryNode).getSummaryNode(), nodeTo.(FlowSummaryNode).getSummaryNode(), true, model) } - - private predicate simpleInstructionLocalFlowStep(Operand opFrom, Instruction iTo) { - // Treat all conversions as flow, even conversions between different numeric types. - conversionFlow(opFrom, iTo, false, _) - or - iTo.(CopyInstruction).getSourceValueOperand() = opFrom - } - - private predicate simpleOperandLocalFlowStep(Instruction iFrom, Operand opTo) { - not opTo instanceof MemoryOperand and - opTo.getDef() = iFrom - } - - private predicate modelFlow(Node nodeFrom, Node nodeTo, string model) { - exists( - CallInstruction call, DataFlowFunction func, FunctionInput modelIn, FunctionOutput modelOut - | - call.getStaticCallTarget() = func and - func.hasDataFlow(modelIn, modelOut) and - model = "DataFlowFunction" - | - nodeFrom = callInput(call, modelIn) and - nodeTo = callOutput(call, modelOut) - or - exists(int d | - nodeFrom = callInput(call, modelIn, d) and - nodeTo = callOutput(call, modelOut, d) - ) - ) - } - - private predicate reverseFlow(Node nodeFrom, Node nodeTo) { - reverseFlowOperand(nodeFrom, nodeTo) - or - reverseFlowInstruction(nodeFrom, nodeTo) - } - - private predicate reverseFlowOperand(Node nodeFrom, IndirectReturnOutNode nodeTo) { - exists(Operand address, int indirectionIndex | - nodeHasOperand(nodeTo, address, indirectionIndex) - | - exists(StoreInstruction store | - nodeHasInstruction(nodeFrom, store, indirectionIndex - 1) and - store.getDestinationAddressOperand() = address - ) - or - // We also want a write coming out of an `OutNode` to flow `nodeTo`. - // This is different from `reverseFlowInstruction` since `nodeFrom` can never - // be an `OutNode` when it's defined by an instruction. - Ssa::outNodeHasAddressAndIndex(nodeFrom, address, indirectionIndex) - ) - } - - private predicate reverseFlowInstruction(Node nodeFrom, IndirectReturnOutNode nodeTo) { - exists(Instruction address, int indirectionIndex | - nodeHasInstruction(nodeTo, address, indirectionIndex) - | - exists(StoreInstruction store | - nodeHasInstruction(nodeFrom, store, indirectionIndex - 1) and - store.getDestinationAddress() = address - ) - ) - } } import Cached @@ -2383,7 +1847,7 @@ module BarrierGuard { guardChecks(g, def.getARead().asOperand().getDef().getConvertedResultExpression(), branch) and guardControlsPhiInput(g, branch, def, pragma[only_bind_into](input), pragma[only_bind_into](phi)) and - result = TSsaPhiInputNode(phi, input) + result = ssaPhiInputNode(phi, input) ) } @@ -2483,7 +1947,7 @@ module BarrierGuard { branch) and guardControlsPhiInput(g, branch, def, pragma[only_bind_into](input), pragma[only_bind_into](phi)) and - result = TSsaPhiInputNode(phi, input) + result = ssaPhiInputNode(phi, input) ) } } @@ -2531,7 +1995,7 @@ module InstructionBarrierGuard 0 | +Node1Impl callInput0(CallInstruction call, FunctionInput input, int d) { + exists(Node1Impl n | n = callInput0(call, input) and d > 0 | // An argument or qualifier - hasOperandAndIndex(result, n.asOperand(), d) + hasOperandAndIndex1(result, n.asOperand(), d) or exists(Operand operand, int indirectionIndex | // A value pointed to by an argument or qualifier - hasOperandAndIndex(n, operand, indirectionIndex) and - hasOperandAndIndex(result, operand, indirectionIndex + d) + hasOperandAndIndex1(n, operand, indirectionIndex) and + hasOperandAndIndex1(result, operand, indirectionIndex + d) ) ) } -private IndirectReturnOutNode getIndirectReturnOutNode(CallInstruction call, int d) { - result.getCallInstruction() = call and - result.getIndirectionIndex() = d +DataFlow::Node callInput(CallInstruction call, FunctionInput input, int d) { + result = TNode1(callInput0(call, input, d)) } /** * Gets the instruction that holds the `output` for `call`. */ bindingset[d] -Node callOutput(CallInstruction call, FunctionOutput output, int d) { - exists(DataFlow::Node n, int indirectionIndex | - n = callOutputWithIndirectionIndex(call, output, indirectionIndex) and d > 0 +Node1Impl callOutput0(CallInstruction call, FunctionOutput output, int d) { + exists(Node1Impl n, int indirectionIndex | + n = callOutputWithIndirectionIndex0(call, output, indirectionIndex) and d > 0 | // The return value - result = callOutputWithIndirectionIndex(call, output, indirectionIndex + d) + result = callOutputWithIndirectionIndex0(call, output, indirectionIndex + d) or // If there isn't an indirect out node for the call with indirection `d` then // we conflate this with the underlying `CallInstruction`. - not exists(getIndirectReturnOutNode(call, indirectionIndex + d)) and + not exists(getIndirectReturnOutNode0(call, indirectionIndex + d)) and n = result ) } + +/** + * Gets the instruction that holds the `output` for `call`. + */ +bindingset[d] +Node callOutput(CallInstruction call, FunctionOutput output, int d) { + result = TNode1(callOutput0(call, output, d)) +} diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/Node0ToString.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/Node0ToString.qll index 6d1d6c7ab5cf..4ced9adb01ac 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/Node0ToString.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/Node0ToString.qll @@ -11,8 +11,31 @@ private import semmle.code.cpp.ir.IR private import codeql.util.Unit private import DataFlowUtil +private import DataFlowPrivate +private import DataFlowNodes import NormalNode0ToString // Change this import to control which version should be used. +private int getNumberOfIndirections(Node n) { + exists(Node1Impl n1 | + n = TNode1(n1) and + result = getNumberOfIndirections0(n1) + ) + or + result = n.(VariableNode).getIndirectionIndex() + or + result = n.(PostUpdateNodeImpl).getIndirectionIndex() + or + result = n.(FinalParameterNode).getIndirectionIndex() + or + result = n.(BodyLessParameterNodeImpl).getIndirectionIndex() +} + +/** + * Gets the number of stars (i.e., `*`s) needed to produce the `toString` + * output for `n`. + */ +string stars(Node n) { result = repeatStars(getNumberOfIndirections(n)) } + /** An abstract class to control the behavior of `Node.toString`. */ abstract class Node0ToString extends Unit { /** diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll index 77e6e8590cef..acd1db9b3e0f 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll @@ -11,6 +11,7 @@ private import semmle.code.cpp.ir.internal.IRCppLanguage private import semmle.code.cpp.ir.dataflow.internal.ModelUtil private import semmle.code.cpp.ir.implementation.raw.internal.TranslatedInitialization private import DataFlowPrivate +private import DataFlowNodes import SsaInternalsCommon private module SourceVariables { @@ -60,6 +61,18 @@ private module SourceVariables { /** Gets the location of this variable. */ Location getLocation() { result = this.getBaseVariable().getLocation() } + + /** + * Gets the SSA variable that represents `k` indirections of this variable. + * Note that this is the identity for `k = 0`. + */ + SourceVariable getIndirectVariable(int k) { + k >= 0 and + exists(BaseSourceVariable bv, int indirection | + sourceVariableHasBaseAndIndex(this, bv, indirection) and + sourceVariableHasBaseAndIndex(result, bv, indirection + k) + ) + } } } @@ -180,6 +193,8 @@ abstract class DefImpl extends TDefImpl { /** Gets a textual representation of this element. */ abstract string toString(); + abstract Node1Impl getNode(); + /** Gets the block of this definition or use. */ final IRBlock getBlock() { this.hasIndexInBlock(result, _) } @@ -245,7 +260,7 @@ abstract class UseImpl extends TUseImpl { UseImpl() { any() } /** Gets the node associated with this use. */ - abstract Node getNode(); + abstract Node1Impl getNode(); /** Gets a textual representation of this element. */ abstract string toString(); @@ -372,6 +387,8 @@ private class DefVariableAddressImpl extends DefAddressImpl { index = 0 ) } + + final override Node1Impl getNode() { none() } } private class DefCallAddressImpl extends DefAddressImpl { @@ -380,6 +397,8 @@ private class DefCallAddressImpl extends DefAddressImpl { final override predicate hasIndexInBlock(IRBlock block, int index) { block.getInstruction(index) = v.getCallInstruction() } + + final override Node1Impl getNode() { none() } } private class DirectDef extends DefImpl, TDirectDefImpl { @@ -410,6 +429,12 @@ private class DirectDef extends DefImpl, TDirectDefImpl { override Node0Impl getValue() { isDef(_, result, address, _, _, _) } override predicate isCertain() { isDef(true, _, address, _, _, indirectionIndex) } + + final override Node1Impl getNode() { + nodeHasOperand1(result, this.getValue().asOperand(), this.getIndirectionIndex()) + or + nodeHasInstruction1(result, this.getValue().asInstruction(), this.getIndirectionIndex()) + } } private class DirectUseImpl extends UseImpl, TDirectUseImpl { @@ -453,15 +478,7 @@ private class DirectUseImpl extends UseImpl, TDirectUseImpl { override predicate isCertain() { isUse(true, operand, _, _, indirectionIndex) } - override Node getNode() { nodeHasOperand(result, operand, indirectionIndex) } -} - -pragma[nomagic] -private predicate finalParameterNodeHasParameterAndIndex( - FinalParameterNode n, Parameter p, int indirectionIndex -) { - n.getParameter() = p and - n.getIndirectionIndex() = indirectionIndex + override Node1Impl getNode() { nodeHasOperand1(result, operand, indirectionIndex) } } class FinalParameterUse extends UseImpl, TFinalParameterUse { @@ -475,7 +492,7 @@ class FinalParameterUse extends UseImpl, TFinalParameterUse { int getArgumentIndex() { result = p.getIndex() } - override Node getNode() { finalParameterNodeHasParameterAndIndex(result, p, indirectionIndex) } + override FinalParameterNode0 getNode() { result.getUse() = this } override int getIndirection() { result = indirectionIndex + 1 } @@ -510,6 +527,8 @@ class FinalParameterUse extends UseImpl, TFinalParameterUse { result instanceof UnknownDefaultLocation } + Type getType() { result = getTypeImpl(p.getUnderlyingType(), indirectionIndex) } + override BaseIRVariable getBaseSourceVariable() { result.getIRVariable().getAst() = p } } @@ -572,7 +591,7 @@ class GlobalUse extends UseImpl, TGlobalUse { override string toString() { result = "Use of " + global } - override FinalGlobalValue getNode() { result.getGlobalUse() = this } + override FinalGlobalValue0 getNode() { result.getGlobalUse() = this } override int getIndirection() { isGlobalUse(global, f, result, indirectionIndex) } @@ -667,6 +686,8 @@ class GlobalDefImpl extends DefImpl, TGlobalDefImpl { override string toString() { result = "Def of " + this.getSourceVariable() } override Location getLocation() { result = f.getLocation() } + + final override InitialGlobalValue0 getNode() { getDefImpl(result.getDef()) = this } } /** @@ -677,37 +698,27 @@ predicate adjacentDefRead(IRBlock bb1, int i1, SourceVariable sv, IRBlock bb2, i adjacentDefReadExt(_, sv, bb1, i1, bb2, i2) } -predicate useToNode(IRBlock bb, int i, SourceVariable sv, Node nodeTo) { +predicate useToNode(IRBlock bb, int i, SourceVariable sv, Node1Impl nodeTo) { + useToNode0(bb, i, sv, nodeTo) +} + +predicate useToNode0(IRBlock bb, int i, SourceVariable sv, Node1Impl nodeTo) { exists(UseImpl use | use.hasIndexInBlock(bb, i, sv) and nodeTo = use.getNode() ) } -pragma[noinline] -predicate outNodeHasAddressAndIndex( - IndirectArgumentOutNode out, Operand address, int indirectionIndex -) { - out.getAddressOperand() = address and - out.getIndirectionIndex() = indirectionIndex -} - /** * INTERNAL: Do not use. * * Holds if `node` is the node that corresponds to the definition of `def`. */ predicate defToNode( - Node node, DefinitionExt def, SourceVariable sv, IRBlock bb, int i, boolean uncertain + Node1Impl node, DefinitionExt def, SourceVariable sv, IRBlock bb, int i, boolean uncertain ) { - def.definesAt(sv, bb, i, _) and - ( - nodeHasOperand(node, def.getValue().asOperand(), def.getIndirectionIndex()) - or - nodeHasInstruction(node, def.getValue().asInstruction(), def.getIndirectionIndex()) - or - node.(InitialGlobalValue).getGlobalDef() = def - ) and + def.hasIndexInBlock(bb, i, sv) and + node = def.getNode() and if def.isCertain() then uncertain = false else uncertain = true } @@ -719,7 +730,7 @@ predicate defToNode( * * `uncertain` is `true` if this is an uncertain definition. */ -predicate nodeToDefOrUse(Node node, SourceVariable sv, IRBlock bb, int i, boolean uncertain) { +predicate nodeToDefOrUse(Node1Impl node, SourceVariable sv, IRBlock bb, int i, boolean uncertain) { defToNode(node, _, sv, bb, i, uncertain) or // Node -> Use @@ -731,14 +742,14 @@ predicate nodeToDefOrUse(Node node, SourceVariable sv, IRBlock bb, int i, boolea * Perform a single conversion-like step from `nFrom` to `nTo`. This relation * only holds when there is no use-use relation out of `nTo`. */ -private predicate indirectConversionFlowStep(Node nFrom, Node nTo) { +private predicate indirectConversionFlowStep(Node1Impl nFrom, Node1Impl nTo) { not exists(SourceVariable sv, IRBlock bb2, int i2 | useToNode(bb2, i2, sv, nTo) and adjacentDefRead(bb2, i2, sv, _, _) ) and exists(Operand op1, Operand op2, int indirectionIndex, Instruction instr | - hasOperandAndIndex(nFrom, op1, pragma[only_bind_into](indirectionIndex)) and - hasOperandAndIndex(nTo, op2, pragma[only_bind_into](indirectionIndex)) and + hasOperandAndIndex1(nFrom, op1, pragma[only_bind_into](indirectionIndex)) and + hasOperandAndIndex1(nTo, op2, pragma[only_bind_into](indirectionIndex)) and instr = op2.getDef() and conversionFlow(op1, instr, _, _) ) @@ -748,7 +759,15 @@ private predicate indirectConversionFlowStep(Node nFrom, Node nTo) { * Holds if `node` is a phi input node that should receive flow from the * definition to (or use of) `sv` at `(bb1, i1)`. */ -private predicate phiToNode(SsaPhiInputNode node, SourceVariable sv, IRBlock bb1, int i1) { +private predicate phiToNode(SsaPhiInputNode0 node, SourceVariable sv, IRBlock bb1, int i1) { + phiToNode0(node, sv, bb1, i1) +} + +/** + * Holds if `node` is a phi input node that should receive flow from the + * definition to (or use of) `sv` at `(bb1, i1)`. + */ +private predicate phiToNode0(SsaPhiInputNode0 node, SourceVariable sv, IRBlock bb1, int i1) { exists(PhiNode phi, IRBlock input | phi.hasInputFromBlock(_, sv, bb1, i1, input) and node.getPhiNode() = phi and @@ -765,7 +784,7 @@ private predicate phiToNode(SsaPhiInputNode node, SourceVariable sv, IRBlock bb1 * is _not_ guaranteed to overwrite the entire allocation. */ private predicate ssaFlowImpl( - IRBlock bb1, int i1, SourceVariable sv, Node nodeFrom, Node nodeTo, boolean uncertain + IRBlock bb1, int i1, SourceVariable sv, Node1Impl nodeFrom, Node1Impl nodeTo, boolean uncertain ) { nodeToDefOrUse(nodeFrom, sv, bb1, i1, uncertain) and ( @@ -780,7 +799,7 @@ private predicate ssaFlowImpl( } /** Gets a node that represents the prior definition of `node`. */ -private Node getAPriorDefinition(DefinitionExt next) { +private Node1Impl getAPriorDefinition(DefinitionExt next) { exists(IRBlock bb, int i, SourceVariable sv | lastRefRedefExt(_, pragma[only_bind_into](sv), pragma[only_bind_into](bb), pragma[only_bind_into](i), _, next) and @@ -810,14 +829,14 @@ private predicate inOut(FIO::FunctionInput input, FIO::FunctionOutput output) { * first argument of `strcpy`). * - a conversion that flows to such an input. */ -private predicate modeledFlowBarrier(Node n) { +private predicate modeledFlowBarrier(Node1Impl n) { exists( FIO::FunctionInput input, FIO::FunctionOutput output, CallInstruction call, PartialFlow::PartialFlowFunction partialFlowFunc | - n = callInput(call, input) and + n = callInput0(call, input) and inOut(input, output) and - exists(callOutput(call, output)) and + exists(callOutput0(call, output)) and partialFlowFunc = call.getStaticCallTarget() and not partialFlowFunc.isPartialWrite(output) | @@ -826,17 +845,17 @@ private predicate modeledFlowBarrier(Node n) { call.getStaticCallTarget().(Taint::TaintFunction).hasTaintFlow(_, output) ) or - exists(Operand operand, Instruction instr, Node n0, int indirectionIndex | + exists(Operand operand, Instruction instr, Node1Impl n0, int indirectionIndex | modeledFlowBarrier(n0) and - nodeHasInstruction(n0, instr, indirectionIndex) and + nodeHasInstruction1(n0, instr, indirectionIndex) and conversionFlow(operand, instr, false, _) and - nodeHasOperand(n, operand, indirectionIndex) + nodeHasOperand1(n, operand, indirectionIndex) ) } /** Holds if there is def-use or use-use flow from `nodeFrom` to `nodeTo`. */ -predicate ssaFlow(Node nodeFrom, Node nodeTo) { - exists(Node nFrom, boolean uncertain, IRBlock bb, int i, SourceVariable sv | +predicate ssaFlow(Node1Impl nodeFrom, Node1Impl nodeTo) { + exists(Node1Impl nFrom, boolean uncertain, IRBlock bb, int i, SourceVariable sv | ssaFlowImpl(bb, i, sv, nFrom, nodeTo, uncertain) and not modeledFlowBarrier(nFrom) and nodeFrom != nodeTo @@ -872,16 +891,16 @@ private predicate isArgumentOfCallableOperand(DataFlowCall call, Operand operand ) } -private predicate isArgumentOfCallable(DataFlowCall call, Node n) { +private predicate isArgumentOfCallable(DataFlowCall call, Node1Impl n) { isArgumentOfCallableOperand(call, n.asOperand()) or exists(Operand op | - n.(IndirectOperand).hasOperandAndIndirectionIndex(op, _) and + n.(IndirectOperand1).hasOperandAndIndirectionIndex(op, _) and isArgumentOfCallableOperand(call, op) ) or exists(Instruction instr | - n.(IndirectInstruction).hasInstructionAndIndirectionIndex(instr, _) and + n.(IndirectInstruction1).hasInstructionAndIndirectionIndex(instr, _) and isArgumentOfCallableInstruction(call, instr) ) } @@ -889,7 +908,7 @@ private predicate isArgumentOfCallable(DataFlowCall call, Node n) { /** * Holds if there is use-use flow from `pun`'s pre-update node to `n`. */ -private predicate postUpdateNodeToFirstUse(PostUpdateNode pun, Node n) { +private predicate postUpdateNodeToFirstUse(PostUpdateNode0 pun, Node1Impl n) { // We cannot mark a `PointerArithmeticInstruction` that computes an offset // based on some SSA // variable `x` as a use of `x` since this creates taint-flow in the @@ -905,7 +924,7 @@ private predicate postUpdateNodeToFirstUse(PostUpdateNode pun, Node n) { // So this predicate recurses back along conversions and `PointerArithmetic` // instructions to find the first use that has provides use-use flow, and // uses that target as the target of the `nodeFrom`. - exists(Node adjusted, IRBlock bb1, int i1, SourceVariable sv | + exists(Node1Impl adjusted, IRBlock bb1, int i1, SourceVariable sv | indirectConversionFlowStep*(adjusted, pun.getPreUpdateNode()) and useToNode(bb1, i1, sv, adjusted) | @@ -918,9 +937,9 @@ private predicate postUpdateNodeToFirstUse(PostUpdateNode pun, Node n) { ) } -private predicate stepUntilNotInCall(DataFlowCall call, Node n1, Node n2) { +private predicate stepUntilNotInCall(DataFlowCall call, Node1Impl n1, Node1Impl n2) { isArgumentOfCallable(call, n1) and - exists(Node mid | ssaFlowImpl(_, _, _, n1, mid, _) | + exists(Node1Impl mid | ssaFlowImpl(_, _, _, n1, mid, _) | isArgumentOfCallable(call, mid) and stepUntilNotInCall(call, mid, n2) or @@ -931,7 +950,7 @@ private predicate stepUntilNotInCall(DataFlowCall call, Node n1, Node n2) { bindingset[n1, n2] pragma[inline_late] -private predicate isArgumentOfSameCall(DataFlowCall call, Node n1, Node n2) { +private predicate isArgumentOfSameCall(DataFlowCall call, Node1Impl n1, Node1Impl n2) { isArgumentOfCallable(call, n1) and isArgumentOfCallable(call, n2) } @@ -952,8 +971,8 @@ private predicate isArgumentOfSameCall(DataFlowCall call, Node n1, Node n2) { * similarly we want flow from the second argument of `write_first_argument` to `x` * on the next line. */ -predicate postUpdateFlow(PostUpdateNode pun, Node nodeTo) { - exists(Node preUpdate, Node mid | +predicate postUpdateFlow(PostUpdateNode0 pun, Node1Impl nodeTo) { + exists(Node1Impl preUpdate, Node1Impl mid | preUpdate = pun.getPreUpdateNode() and postUpdateNodeToFirstUse(pun, mid) | @@ -968,7 +987,7 @@ predicate postUpdateFlow(PostUpdateNode pun, Node nodeTo) { } /** Holds if `nodeTo` receives flow from the phi node `nodeFrom`. */ -predicate fromPhiNode(SsaPhiNode nodeFrom, Node nodeTo) { +predicate fromPhiNode(SsaPhiNode0 nodeFrom, Node1Impl nodeTo) { exists(PhiNode phi, SourceVariable sv, IRBlock bb1, int i1 | phi = nodeFrom.getPhiNode() and phi.definesAt(sv, bb1, i1, _) @@ -1183,13 +1202,24 @@ class DefinitionExt extends SsaImpl::DefinitionExt { /** Gets a node that represents a read of this SSA definition. */ pragma[nomagic] - Node getARead() { + Node getARead() { result = TNode1(this.getARead0()) } + + pragma[nomagic] + Node1Impl getARead0() { exists(SourceVariable sv, IRBlock bb, int i | SsaCached::ssaDefReachesReadExt(sv, this, bb, i) | - useToNode(bb, i, sv, result) + useToNode0(bb, i, sv, result) or - phiToNode(result, sv, bb, i) + phiToNode0(result, sv, bb, i) ) } + + /** INTERNAL: Do not use. */ + Node1Impl getNode() { result = getDefImpl(this).getNode() } + + /** Holds if this definition is is the `index`'th member in `block`. */ + predicate hasIndexInBlock(IRBlock block, int index, SourceVariable sv) { + getDefImpl(this).hasIndexInBlock(block, index, sv) + } } import SsaCached diff --git a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll index 83017aec3537..4dedef33883c 100644 --- a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll +++ b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll @@ -319,7 +319,7 @@ private module Config implements ProductFlow::StateConfigSig { // In the above case, this barrier blocks flow from the indirect node // for `p` to `p[1]`. exists(Operand operand, PointerAddInstruction add | - node.(IndirectOperand).hasOperandAndIndirectionIndex(operand, _) and + node.(DataFlow::IndirectOperand).hasOperandAndIndirectionIndex(operand, _) and add.getLeftOperand() = operand and add.getRight().(ConstantInstruction).getValue() != "0" ) diff --git a/cpp/ql/src/Likely Bugs/Memory Management/AllocaInLoop.ql b/cpp/ql/src/Likely Bugs/Memory Management/AllocaInLoop.ql index 587a2ecc6ffb..f3f2393b0a0b 100644 --- a/cpp/ql/src/Likely Bugs/Memory Management/AllocaInLoop.ql +++ b/cpp/ql/src/Likely Bugs/Memory Management/AllocaInLoop.ql @@ -15,6 +15,7 @@ import cpp import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils import semmle.code.cpp.ir.dataflow.DataFlow +import semmle.code.cpp.ir.IR /** Gets a loop that contains `e`. */ Loop getAnEnclosingLoopOfExpr(Expr e) { result = getAnEnclosingLoopOfStmt(e.getEnclosingStmt()) } @@ -45,9 +46,15 @@ private Expr getExpr(DataFlow::Node node) { or result = node.asOperand().getUse().getAst() or - result = node.(DataFlow::RawIndirectInstruction).getInstruction().getAst() + exists(Instruction i | + node.(DataFlow::IndirectInstruction).hasInstructionAndIndirectionIndex(i, _) and + result = i.getAst() + ) or - result = node.(DataFlow::RawIndirectOperand).getOperand().getUse().getAst() + exists(Operand op | + node.(DataFlow::IndirectOperand).hasOperandAndIndirectionIndex(op, _) and + result = op.getUse().getAst() + ) } /**