From 048a138505b14ce33e43fde05347cc9d4dc5fd39 Mon Sep 17 00:00:00 2001 From: Benjamin Rodes Date: Wed, 8 Nov 2023 12:20:53 -0500 Subject: [PATCH 01/10] Adding missing strsafe sprintf variants. (cherry picked from commit bdae2af0e2f4bc39fd717b42533681b5f1ef2452) --- .../cpp/models/implementations/Printf.qll | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/models/implementations/Printf.qll b/cpp/ql/lib/semmle/code/cpp/models/implementations/Printf.qll index f0a25dfa30d4..8e4dc32013af 100644 --- a/cpp/ql/lib/semmle/code/cpp/models/implementations/Printf.qll +++ b/cpp/ql/lib/semmle/code/cpp/models/implementations/Printf.qll @@ -147,19 +147,32 @@ private class SnprintfImpl extends Snprintf { /** * The Microsoft `StringCchPrintf` function and variants. + * See: https://learn.microsoft.com/en-us/windows/win32/api/strsafe/ + * and + * https://learn.microsoft.com/en-us/previous-versions/windows/embedded/ms860435(v=msdn.10) */ private class StringCchPrintf extends FormattingFunction { StringCchPrintf() { this instanceof TopLevelFunction and - this.hasGlobalName([ - "StringCchPrintf", "StringCchPrintfEx", "StringCchPrintf_l", "StringCchPrintf_lEx", - "StringCbPrintf", "StringCbPrintfEx", "StringCbPrintf_l", "StringCbPrintf_lEx" - ]) and + exists(string baseName | + baseName in [ + "StringCchPrintf", //StringCchPrintf(pszDest, cchDest, pszFormat, …) + "StringCchPrintfEx", //StringCchPrintfEx(pszDest,cchDest, ppszDestEnd, pcchRemaining, dwFlags, pszFormat, ...); + "StringCchPrintf_l", //StringCchPrintf_l(pszDest, cbDest, pszFormat, locale, …) + "StringCchPrintf_lEx", //StringCchPrintf_lEx(pszDest, cchDest, ppszDestEnd, pcchRemaining, dwFlags, pszFormat, locale, …) + "StringCbPrintf", //StringCbPrintf(pszDest, cbDest, pszFormat, …) + "StringCbPrintfEx", //StringCbPrintfEx(pszDest, cbDest, ppszDestEnd, pcbRemaining, dwFlags, pszFormat, …) + "StringCbPrintf_l", //StringCbPrintf_l(pszDest, cbDest, pszFormat, locale, …) + "StringCbPrintf_lEx" //StringCbPrintf_lEx(pszDest, cbDest, ppszDestEnd, pcbRemaining, dwFlags, pszFormat, locale, …) + ] + | + this.hasGlobalName(baseName + ["", "A", "W"]) + ) and not exists(this.getDefinition().getFile().getRelativePath()) } override int getFormatParameterIndex() { - if this.getName().matches("%Ex") then result = 5 else result = 2 + if this.getName().matches("%Ex" + ["", "A", "W"]) then result = 5 else result = 2 } override int getOutputParameterIndex(boolean isStream) { result = 0 and isStream = false } From 3ebeb03ea0bee9a4c391d32c8560e1a27c07824d Mon Sep 17 00:00:00 2001 From: Benjamin Rodes Date: Wed, 8 Nov 2023 14:38:31 -0500 Subject: [PATCH 02/10] Prooof of concept change to NonConstantFormat.ql --- .../Likely Bugs/Format/NonConstantFormat.ql | 235 +++++++----------- 1 file changed, 94 insertions(+), 141 deletions(-) diff --git a/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql b/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql index 830859b18ff2..1724db35e01a 100644 --- a/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql +++ b/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql @@ -15,144 +15,97 @@ * external/cwe/cwe-134 */ -import semmle.code.cpp.ir.dataflow.TaintTracking -import semmle.code.cpp.commons.Printf - -// For the following `...gettext` functions, we assume that -// all translations preserve the type and order of `%` specifiers -// (and hence are safe to use as format strings). This -// assumption is hard-coded into the query. -predicate whitelistFunction(Function f, int arg) { - // basic variations of gettext - f.getName() = "_" and arg = 0 - or - f.getName() = "gettext" and arg = 0 - or - f.getName() = "dgettext" and arg = 1 - or - f.getName() = "dcgettext" and arg = 1 - or - // plural variations of gettext that take one format string for singular and another for plural form - f.getName() = "ngettext" and - (arg = 0 or arg = 1) - or - f.getName() = "dngettext" and - (arg = 1 or arg = 2) - or - f.getName() = "dcngettext" and - (arg = 1 or arg = 2) -} - -// we assume that ALL uses of the `_` macro -// return constant string literals -predicate underscoreMacro(Expr e) { - exists(MacroInvocation mi | - mi.getMacroName() = "_" and - mi.getExpr() = e - ) -} - -/** - * Holds if `t` cannot hold a character array, directly or indirectly. - */ -predicate cannotContainString(Type t, boolean isIndirect) { - isIndirect = false and - exists(Type unspecified | - unspecified = t.getUnspecifiedType() and - not unspecified instanceof UnknownType - | - unspecified instanceof BuiltInType or - unspecified instanceof IntegralOrEnumType - ) -} - -predicate isNonConst(DataFlow::Node node, boolean isIndirect) { - exists(Expr e | - e = node.asExpr() and isIndirect = false - or - e = node.asIndirectExpr() and isIndirect = true - | - exists(FunctionCall fc | fc = e | - not ( - whitelistFunction(fc.getTarget(), _) or - fc.getTarget().hasDefinition() - ) - ) - or - exists(Parameter p | p = e.(VariableAccess).getTarget() | - p.getFunction().getName() = "main" and p.getType() instanceof PointerType - ) - or - e instanceof CrementOperation - or - e instanceof AddressOfExpr - or - e instanceof ReferenceToExpr - or - e instanceof AssignPointerAddExpr - or - e instanceof AssignPointerSubExpr - or - e instanceof PointerArithmeticOperation - or - e instanceof FieldAccess - or - e instanceof PointerDereferenceExpr - or - e instanceof AddressOfExpr - or - e instanceof ExprCall - or - e instanceof NewArrayExpr - or - exists(Variable v | v = e.(VariableAccess).getTarget() | - v.getType().(ArrayType).getBaseType() instanceof CharType and - exists(AssignExpr ae | - ae.getLValue().(ArrayExpr).getArrayBase().(VariableAccess).getTarget() = v - ) - ) - ) - or - node instanceof DataFlow::DefinitionByReferenceNode and - isIndirect = true -} - -pragma[noinline] -predicate isBarrierNode(DataFlow::Node node) { - underscoreMacro([node.asExpr(), node.asIndirectExpr()]) - or - exists(node.asExpr()) and - cannotContainString(node.getType(), false) -} - -predicate isSinkImpl(DataFlow::Node sink, Expr formatString) { - [sink.asExpr(), sink.asIndirectExpr()] = formatString and - exists(FormattingFunctionCall fc | formatString = fc.getArgument(fc.getFormatParameterIndex())) -} - -module NonConstFlowConfig implements DataFlow::ConfigSig { - predicate isSource(DataFlow::Node source) { - exists(boolean isIndirect, Type t | - isNonConst(source, isIndirect) and - t = source.getType() and - not cannotContainString(t, isIndirect) - ) - } - - predicate isSink(DataFlow::Node sink) { isSinkImpl(sink, _) } - - predicate isBarrier(DataFlow::Node node) { isBarrierNode(node) } -} - -module NonConstFlow = TaintTracking::Global; - -from FormattingFunctionCall call, Expr formatString -where - call.getArgument(call.getFormatParameterIndex()) = formatString and - exists(DataFlow::Node sink | - NonConstFlow::flowTo(sink) and - isSinkImpl(sink, formatString) - ) -select formatString, - "The format string argument to " + call.getTarget().getName() + - " should be constant to prevent security issues and other potential errors." + import semmle.code.cpp.ir.dataflow.TaintTracking + import semmle.code.cpp.security.FlowSources + + class UncalledFunction extends Function { + UncalledFunction() { + not exists(Call c | c.getTarget() = this) + } + } + + // For the following `...gettext` functions, we assume that + // all translations preserve the type and order of `%` specifiers + // (and hence are safe to use as format strings). This + // assumption is hard-coded into the query. + predicate whitelistFunction(Function f, int arg) { + // basic variations of gettext + f.getName() = "_" and arg = 0 + or + f.getName() = "gettext" and arg = 0 + or + f.getName() = "dgettext" and arg = 1 + or + f.getName() = "dcgettext" and arg = 1 + or + // plural variations of gettext that take one format string for singular and another for plural form + f.getName() = "ngettext" and + (arg = 0 or arg = 1) + or + f.getName() = "dngettext" and + (arg = 1 or arg = 2) + or + f.getName() = "dcngettext" and + (arg = 1 or arg = 2) + } + + + predicate isNonConst(DataFlow::Node node){ + exists(Call fc | fc = [node.asExpr(), node.asIndirectExpr()] | + not ( + whitelistFunction(fc.getTarget(), _) or + fc.getTarget().hasDefinition() + ) + ) + or + exists(UncalledFunction f | f.getAParameter() = node.asParameter()) + or + ( + node instanceof DataFlow::DefinitionByReferenceNode and + not exists(FormattingFunctionCall fc | node.asDefiningArgument() = fc.getOutputArgument(_)) and + not exists(Call c | c.getAnArgument() = node.asDefiningArgument() and c.getTarget().hasDefinition()) + ) + or node instanceof FlowSource + } + + + + predicate isSinkImpl(DataFlow::Node sink, Expr formatString) { + [sink.asExpr(), sink.asIndirectExpr()] = formatString and + exists(FormattingFunctionCall fc | formatString = fc.getArgument(fc.getFormatParameterIndex())) + } + + + module NonConstFlowConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { + isNonConst(source) + } + + predicate isSink(DataFlow::Node sink) { isSinkImpl(sink, _) } + + predicate isAdditionalFlowStep(DataFlow::Node n1, DataFlow::Node n2){ + exists(Call c, int ind | + whitelistFunction(c.getTarget(), ind) + and c.getArgument(ind) = [n1.asExpr(), n1.asIndirectExpr()] + and n2.asIndirectExpr() = c + ) + + } + + } + + module NonConstFlow = TaintTracking::Global; + + + from FormattingFunctionCall call, Expr formatString + where + call.getArgument(call.getFormatParameterIndex()) = formatString and + exists(DataFlow::Node sink | + NonConstFlow::flowTo(sink) and + isSinkImpl(sink, formatString) + ) + select formatString, + "The format string argument to " + call.getTarget().getName() + + " should be constant to prevent security issues and other potential errors." + + \ No newline at end of file From c14bef3518f0110f6be394ebac9ec5e501ec9f1d Mon Sep 17 00:00:00 2001 From: Benjamin Rodes Date: Wed, 8 Nov 2023 12:20:53 -0500 Subject: [PATCH 03/10] Adding missing strsafe sprintf variants. (cherry picked from commit bdae2af0e2f4bc39fd717b42533681b5f1ef2452) --- .../cpp/models/implementations/Printf.qll | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/models/implementations/Printf.qll b/cpp/ql/lib/semmle/code/cpp/models/implementations/Printf.qll index f0a25dfa30d4..8e4dc32013af 100644 --- a/cpp/ql/lib/semmle/code/cpp/models/implementations/Printf.qll +++ b/cpp/ql/lib/semmle/code/cpp/models/implementations/Printf.qll @@ -147,19 +147,32 @@ private class SnprintfImpl extends Snprintf { /** * The Microsoft `StringCchPrintf` function and variants. + * See: https://learn.microsoft.com/en-us/windows/win32/api/strsafe/ + * and + * https://learn.microsoft.com/en-us/previous-versions/windows/embedded/ms860435(v=msdn.10) */ private class StringCchPrintf extends FormattingFunction { StringCchPrintf() { this instanceof TopLevelFunction and - this.hasGlobalName([ - "StringCchPrintf", "StringCchPrintfEx", "StringCchPrintf_l", "StringCchPrintf_lEx", - "StringCbPrintf", "StringCbPrintfEx", "StringCbPrintf_l", "StringCbPrintf_lEx" - ]) and + exists(string baseName | + baseName in [ + "StringCchPrintf", //StringCchPrintf(pszDest, cchDest, pszFormat, …) + "StringCchPrintfEx", //StringCchPrintfEx(pszDest,cchDest, ppszDestEnd, pcchRemaining, dwFlags, pszFormat, ...); + "StringCchPrintf_l", //StringCchPrintf_l(pszDest, cbDest, pszFormat, locale, …) + "StringCchPrintf_lEx", //StringCchPrintf_lEx(pszDest, cchDest, ppszDestEnd, pcchRemaining, dwFlags, pszFormat, locale, …) + "StringCbPrintf", //StringCbPrintf(pszDest, cbDest, pszFormat, …) + "StringCbPrintfEx", //StringCbPrintfEx(pszDest, cbDest, ppszDestEnd, pcbRemaining, dwFlags, pszFormat, …) + "StringCbPrintf_l", //StringCbPrintf_l(pszDest, cbDest, pszFormat, locale, …) + "StringCbPrintf_lEx" //StringCbPrintf_lEx(pszDest, cbDest, ppszDestEnd, pcbRemaining, dwFlags, pszFormat, locale, …) + ] + | + this.hasGlobalName(baseName + ["", "A", "W"]) + ) and not exists(this.getDefinition().getFile().getRelativePath()) } override int getFormatParameterIndex() { - if this.getName().matches("%Ex") then result = 5 else result = 2 + if this.getName().matches("%Ex" + ["", "A", "W"]) then result = 5 else result = 2 } override int getOutputParameterIndex(boolean isStream) { result = 0 and isStream = false } From 0555ce4b4a40c9cd854c73cab6a8fac38b082bad Mon Sep 17 00:00:00 2001 From: Benjamin Rodes Date: Tue, 14 Nov 2023 11:27:57 -0500 Subject: [PATCH 04/10] Experimental non-literal tracing approach. --- .../Likely Bugs/Format/NonConstantFormat.ql | 68 +++++++++++++++---- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql b/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql index 1724db35e01a..7dc21542d766 100644 --- a/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql +++ b/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql @@ -89,23 +89,63 @@ and c.getArgument(ind) = [n1.asExpr(), n1.asIndirectExpr()] and n2.asIndirectExpr() = c ) - } - } module NonConstFlow = TaintTracking::Global; + +module LiteralToFormatConfig implements DataFlow::ConfigSig{ + predicate isSource(DataFlow::Node source) { + source.asExpr() instanceof StringLiteral + } + + predicate isSink(DataFlow::Node sink) { + isSinkImpl(sink, _) + } + + predicate isAdditionalFlowStep(DataFlow::Node n1, DataFlow::Node n2){ + exists(Call c, int ind | + whitelistFunction(c.getTarget(), ind) + and c.getArgument(ind) = [n1.asExpr(), n1.asIndirectExpr()] + and n2.asIndirectExpr() = c + ) + } +} + +module LiteralToFormatTrace = TaintTracking::Global; + +predicate isLiteralFormat(Expr fmt){ + exists(DataFlow::Node src, DataFlow::Node sink | + LiteralToFormatTrace::flow(src,sink) + and + isSinkImpl(sink, fmt) + ) +} + +predicate isNonLiteralfmt(Expr fmt){ + exists(DataFlow::Node sink | isSinkImpl(sink, fmt)) + and + not isLiteralFormat(fmt) +} + - from FormattingFunctionCall call, Expr formatString - where - call.getArgument(call.getFormatParameterIndex()) = formatString and - exists(DataFlow::Node sink | - NonConstFlow::flowTo(sink) and - isSinkImpl(sink, formatString) - ) - select formatString, - "The format string argument to " + call.getTarget().getName() + - " should be constant to prevent security issues and other potential errors." - - \ No newline at end of file +// from FormattingFunctionCall call, Expr formatString +// where +// call.getArgument(call.getFormatParameterIndex()) = formatString and +// exists(DataFlow::Node sink | +// NonConstFlow::flowTo(sink) and +// isSinkImpl(sink, formatString) +// ) +// select formatString, +// "The format string argument to " + call.getTarget().getName() + +// " should be constant to prevent security issues and other potential errors." + +from FormattingFunctionCall call, Expr formatString +where + call.getArgument(call.getFormatParameterIndex()) = formatString and + isNonLiteralfmt(formatString) +select formatString, + "The format string argument to " + call.getTarget().getName() + + " should be constant to prevent security issues and other potential errors." + From 49532def16968916c1efaff3127c503683eb159f Mon Sep 17 00:00:00 2001 From: Benjamin Rodes Date: Tue, 14 Nov 2023 12:07:38 -0800 Subject: [PATCH 05/10] Hybrid approach --- cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql b/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql index 7dc21542d766..b5a80bebdc55 100644 --- a/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql +++ b/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql @@ -144,7 +144,14 @@ predicate isNonLiteralfmt(Expr fmt){ from FormattingFunctionCall call, Expr formatString where call.getArgument(call.getFormatParameterIndex()) = formatString and - isNonLiteralfmt(formatString) + ( + isNonLiteralfmt(formatString) + or + exists(DataFlow::Node sink | + NonConstFlow::flowTo(sink) and + isSinkImpl(sink, formatString) + ) + ) select formatString, "The format string argument to " + call.getTarget().getName() + " should be constant to prevent security issues and other potential errors." From 5b72823fc39719215f9aed4cb2c89f01723d805a Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Fri, 10 Nov 2023 14:25:23 +0000 Subject: [PATCH 06/10] Merge pull request #14736 from MathiasVP/fix-global-indirect-flow C++: Fix indirect global-variable flow --- .../ir/dataflow/internal/DataFlowPrivate.qll | 8 +-- .../cpp/ir/dataflow/internal/SsaInternals.qll | 52 +++++++++++++------ .../dataflow-consistency.expected | 4 ++ .../dataflow/dataflow-tests/test.cpp | 40 ++++++++++++++ .../dataflow-ir-consistency.expected | 7 ++- 5 files changed, 89 insertions(+), 22 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll index fa54c9c736a3..e3f30b07dad0 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll @@ -632,20 +632,20 @@ predicate jumpStep(Node n1, Node n2) { v = globalUse.getVariable() and n1.(FinalGlobalValue).getGlobalUse() = globalUse | - globalUse.getIndirectionIndex() = 1 and + globalUse.getIndirection() = 1 and v = n2.asVariable() or - v = n2.asIndirectVariable(globalUse.getIndirectionIndex()) + v = n2.asIndirectVariable(globalUse.getIndirection()) ) or exists(Ssa::GlobalDef globalDef | v = globalDef.getVariable() and n2.(InitialGlobalValue).getGlobalDef() = globalDef | - globalDef.getIndirectionIndex() = 1 and + globalDef.getIndirection() = 1 and v = n1.asVariable() or - v = n1.asIndirectVariable(globalDef.getIndirectionIndex()) + v = n1.asIndirectVariable(globalDef.getIndirection()) ) ) } diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll index b6afadfe0e1b..76741dfc5cc8 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll @@ -113,22 +113,12 @@ private newtype TDefOrUseImpl = TGlobalUse(GlobalLikeVariable v, IRFunction f, int indirectionIndex) { // Represents a final "use" of a global variable to ensure that // the assignment to a global variable isn't ruled out as dead. - exists(VariableAddressInstruction vai, int defIndex | - vai.getEnclosingIRFunction() = f and - vai.getAstVariable() = v and - isDef(_, _, _, vai, _, defIndex) and - indirectionIndex = [0 .. defIndex] + 1 - ) + isGlobalUse(v, f, _, indirectionIndex) } or TGlobalDefImpl(GlobalLikeVariable v, IRFunction f, int indirectionIndex) { // Represents the initial "definition" of a global variable when entering // a function body. - exists(VariableAddressInstruction vai | - vai.getEnclosingIRFunction() = f and - vai.getAstVariable() = v and - isUse(_, _, vai, _, indirectionIndex) and - not isDef(_, _, vai.getAUse(), _, _, _) - ) + isGlobalDefImpl(v, f, _, indirectionIndex) } or TIteratorDef( Operand iteratorDerefAddress, BaseSourceVariableInstruction container, int indirectionIndex @@ -150,6 +140,27 @@ private newtype TDefOrUseImpl = ) } +private predicate isGlobalUse( + GlobalLikeVariable v, IRFunction f, int indirection, int indirectionIndex +) { + exists(VariableAddressInstruction vai | + vai.getEnclosingIRFunction() = f and + vai.getAstVariable() = v and + isDef(_, _, _, vai, indirection, indirectionIndex) + ) +} + +private predicate isGlobalDefImpl( + GlobalLikeVariable v, IRFunction f, int indirection, int indirectionIndex +) { + exists(VariableAddressInstruction vai | + vai.getEnclosingIRFunction() = f and + vai.getAstVariable() = v and + isUse(_, _, vai, indirection, indirectionIndex) and + not isDef(_, _, _, vai, _, indirectionIndex) + ) +} + private predicate unspecifiedTypeIsModifiableAt(Type unspecified, int indirectionIndex) { indirectionIndex = [1 .. getIndirectionForUnspecifiedType(unspecified).getNumberOfIndirections()] and exists(CppType cppType | @@ -438,7 +449,7 @@ class GlobalUse extends UseImpl, TGlobalUse { override FinalGlobalValue getNode() { result.getGlobalUse() = this } - override int getIndirection() { result = ind + 1 } + override int getIndirection() { isGlobalUse(global, f, result, ind) } /** Gets the global variable associated with this use. */ GlobalLikeVariable getVariable() { result = global } @@ -460,7 +471,9 @@ class GlobalUse extends UseImpl, TGlobalUse { ) } - override SourceVariable getSourceVariable() { sourceVariableIsGlobal(result, global, f, ind) } + override SourceVariable getSourceVariable() { + sourceVariableIsGlobal(result, global, f, this.getIndirection()) + } final override Cpp::Location getLocation() { result = f.getLocation() } @@ -501,16 +514,18 @@ class GlobalDefImpl extends DefOrUseImpl, TGlobalDefImpl { /** Gets the global variable associated with this definition. */ override SourceVariable getSourceVariable() { - sourceVariableIsGlobal(result, global, f, indirectionIndex) + sourceVariableIsGlobal(result, global, f, this.getIndirection()) } + int getIndirection() { result = indirectionIndex } + /** * Gets the type of this use after specifiers have been deeply stripped * and typedefs have been resolved. */ Type getUnspecifiedType() { result = global.getUnspecifiedType() } - override string toString() { result = "GlobalDef" } + override string toString() { result = "Def of " + this.getSourceVariable() } override Location getLocation() { result = f.getLocation() } @@ -980,7 +995,7 @@ class GlobalDef extends TGlobalDef, SsaDefOrUse { final override Location getLocation() { result = global.getLocation() } /** Gets a textual representation of this definition. */ - override string toString() { result = "GlobalDef" } + override string toString() { result = global.toString() } /** * Holds if this definition has index `index` in block `block`, and @@ -990,6 +1005,9 @@ class GlobalDef extends TGlobalDef, SsaDefOrUse { global.hasIndexInBlock(block, index, sv) } + /** Gets the indirection index of this definition. */ + int getIndirection() { result = global.getIndirection() } + /** Gets the indirection index of this definition. */ int getIndirectionIndex() { result = global.getIndirectionIndex() } diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-consistency.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-consistency.expected index e5c884beb225..7d25b5f79627 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-consistency.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-consistency.expected @@ -23,6 +23,7 @@ argHasPostUpdate | lambdas.cpp:38:2:38:2 | d | ArgumentNode is missing PostUpdateNode. | | lambdas.cpp:45:2:45:2 | e | ArgumentNode is missing PostUpdateNode. | | test.cpp:67:29:67:35 | source1 | ArgumentNode is missing PostUpdateNode. | +| test.cpp:813:19:813:35 | * ... | ArgumentNode is missing PostUpdateNode. | postWithInFlow | BarrierGuard.cpp:49:6:49:6 | x [post update] | PostUpdateNode should not be the target of local flow. | | BarrierGuard.cpp:60:7:60:7 | x [post update] | PostUpdateNode should not be the target of local flow. | @@ -133,6 +134,9 @@ postWithInFlow | test.cpp:728:3:728:4 | * ... [post update] | PostUpdateNode should not be the target of local flow. | | test.cpp:728:4:728:4 | p [inner post update] | PostUpdateNode should not be the target of local flow. | | test.cpp:734:41:734:41 | x [inner post update] | PostUpdateNode should not be the target of local flow. | +| test.cpp:808:5:808:21 | * ... [post update] | PostUpdateNode should not be the target of local flow. | +| test.cpp:808:6:808:21 | global_indirect1 [inner post update] | PostUpdateNode should not be the target of local flow. | +| test.cpp:832:5:832:17 | global_direct [post update] | PostUpdateNode should not be the target of local flow. | viableImplInCallContextTooLarge uniqueParameterNodeAtPosition uniqueParameterNodePosition diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp index c5f7ffcf1603..73c9fd28b933 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp @@ -796,4 +796,44 @@ void test() { MyStruct a; intPointerSource(a.content, a.content); indirect_sink(a.content); // $ ast ir +} + +namespace MoreGlobalTests { + int **global_indirect1; + int **global_indirect2; + int **global_direct; + + void set_indirect1() + { + *global_indirect1 = indirect_source(); + } + + void read_indirect1() { + sink(global_indirect1); // clean + indirect_sink(*global_indirect1); // $ ir MISSING: ast + } + + void set_indirect2() + { + **global_indirect2 = source(); + } + + void read_indirect2() { + sink(global_indirect2); // clean + sink(**global_indirect2); // $ ir MISSING: ast + } + + // overload source with a boolean parameter so + // that we can define a variant that return an int**. + int** source(bool); + + void set_direct() + { + global_direct = source(true); + } + + void read_direct() { + sink(global_direct); // $ ir MISSING: ast + indirect_sink(global_direct); // clean + } } \ No newline at end of file diff --git a/cpp/ql/test/library-tests/syntax-zoo/dataflow-ir-consistency.expected b/cpp/ql/test/library-tests/syntax-zoo/dataflow-ir-consistency.expected index f3c6737ad169..41e9f34e243f 100644 --- a/cpp/ql/test/library-tests/syntax-zoo/dataflow-ir-consistency.expected +++ b/cpp/ql/test/library-tests/syntax-zoo/dataflow-ir-consistency.expected @@ -4,7 +4,12 @@ uniqueType uniqueNodeLocation missingLocation uniqueNodeToString -| cpp11.cpp:50:15:50:16 | (no string representation) | Node should have one toString but has 0. | +| builtin.c:5:5:5:11 | (no string representation) | Node should have one toString but has 0. | +| misc.c:227:7:227:28 | (no string representation) | Node should have one toString but has 0. | +| static_init_templates.cpp:80:18:80:23 | (no string representation) | Node should have one toString but has 0. | +| static_init_templates.cpp:80:18:80:23 | (no string representation) | Node should have one toString but has 0. | +| static_init_templates.cpp:89:18:89:23 | (no string representation) | Node should have one toString but has 0. | +| static_init_templates.cpp:89:18:89:23 | (no string representation) | Node should have one toString but has 0. | parameterCallable localFlowIsLocal readStepIsLocal From 5e2db07a37af460b3f22d2865b9165394767e3c8 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Thu, 16 Nov 2023 16:32:08 +0000 Subject: [PATCH 07/10] C++: Fix global variable flow for array types. --- .../ir/dataflow/internal/DataFlowPrivate.qll | 24 +- .../cpp/ir/dataflow/internal/DataFlowUtil.qll | 18 +- .../dataflow-tests/test-source-sink.expected | 306 ++++++++++++++++++ .../dataflow/dataflow-tests/test.cpp | 88 ++++- 4 files changed, 430 insertions(+), 6 deletions(-) create mode 100644 cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll index e3f30b07dad0..993c9ed4baba 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll @@ -621,6 +621,26 @@ class GlobalLikeVariable extends Variable { } } +/** + * Returns the smallest indirection for `use`. + * + * For most types this is `1`, but for `ArrayType`s (which are allocated on + * the stack) this is `0` + */ +private int getLowerForGlobalUse(Ssa::GlobalUse use) { + if use.getUnspecifiedType() instanceof Cpp::ArrayType then result = 0 else result = 1 +} + +/** + * Returns the smallest indirection for `def`. + * + * For most types this is `1`, but for `ArrayType`s (which are allocated on + * the stack) this is `0` + */ +private int getLowerForGlobalDef(Ssa::GlobalDef def) { + if def.getUnspecifiedType() instanceof Cpp::ArrayType then result = 0 else result = 1 +} + /** * Holds if data can flow from `node1` to `node2` in a way that loses the * calling context. For example, this would happen with flow through a @@ -632,7 +652,7 @@ predicate jumpStep(Node n1, Node n2) { v = globalUse.getVariable() and n1.(FinalGlobalValue).getGlobalUse() = globalUse | - globalUse.getIndirection() = 1 and + globalUse.getIndirection() = getLowerForGlobalUse(globalUse) and v = n2.asVariable() or v = n2.asIndirectVariable(globalUse.getIndirection()) @@ -642,7 +662,7 @@ predicate jumpStep(Node n1, Node n2) { v = globalDef.getVariable() and n2.(InitialGlobalValue).getGlobalDef() = globalDef | - globalDef.getIndirection() = 1 and + globalDef.getIndirection() = getLowerForGlobalDef(globalDef) and v = n1.asVariable() or v = n1.asIndirectVariable(globalDef.getIndirection()) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index 992e995094e3..75d466548d09 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -34,7 +34,11 @@ cached private newtype TIRDataFlowNode = TNode0(Node0Impl node) { DataFlowImplCommon::forceCachingInSameStage() } or TVariableNode(Variable var, int indirectionIndex) { - indirectionIndex = [1 .. Ssa::getMaxIndirectionsForType(var.getUnspecifiedType())] + exists(int lower | + if var.getUnspecifiedType() instanceof ArrayType then lower = 0 else lower = 1 + | + indirectionIndex = [lower .. Ssa::getMaxIndirectionsForType(var.getUnspecifiedType())] + ) } or TPostFieldUpdateNode(FieldAddress operand, int indirectionIndex) { indirectionIndex = @@ -345,7 +349,11 @@ class Node extends TIRDataFlowNode { * Gets the variable corresponding to this node, if any. This can be used for * modeling flow in and out of global variables. */ - Variable asVariable() { this = TVariableNode(result, 1) } + Variable asVariable() { + if result.getUnspecifiedType() instanceof ArrayType + then this = TVariableNode(result, 0) + else this = TVariableNode(result, 1) + } /** * Gets the `indirectionIndex`'th indirection of this node's underlying variable, if any. @@ -353,7 +361,11 @@ class Node extends TIRDataFlowNode { * This can be used for modeling flow in and out of global variables. */ Variable asIndirectVariable(int indirectionIndex) { - indirectionIndex > 1 and + ( + if result.getUnspecifiedType() instanceof ArrayType + then indirectionIndex > 0 + else indirectionIndex > 1 + ) and this = TVariableNode(result, indirectionIndex) } diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected new file mode 100644 index 000000000000..c98bc68c884d --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected @@ -0,0 +1,306 @@ +WARNING: Module DataFlow has been deprecated and may be removed in future (test-source-sink.ql:3,25-42) +WARNING: Module DataFlow has been deprecated and may be removed in future (test-source-sink.ql:3,57-74) +astFlow +| BarrierGuard.cpp:5:19:5:24 | source | BarrierGuard.cpp:9:10:9:15 | source | +| BarrierGuard.cpp:13:17:13:22 | source | BarrierGuard.cpp:15:10:15:15 | source | +| BarrierGuard.cpp:21:17:21:22 | source | BarrierGuard.cpp:25:10:25:15 | source | +| BarrierGuard.cpp:29:16:29:21 | source | BarrierGuard.cpp:31:10:31:15 | source | +| BarrierGuard.cpp:29:16:29:21 | source | BarrierGuard.cpp:33:10:33:15 | source | +| BarrierGuard.cpp:49:10:49:15 | call to source | BarrierGuard.cpp:51:13:51:13 | x | +| BarrierGuard.cpp:49:10:49:15 | call to source | BarrierGuard.cpp:53:13:53:13 | x | +| BarrierGuard.cpp:49:10:49:15 | call to source | BarrierGuard.cpp:55:13:55:13 | x | +| BarrierGuard.cpp:60:11:60:16 | call to source | BarrierGuard.cpp:62:14:62:14 | x | +| BarrierGuard.cpp:60:11:60:16 | call to source | BarrierGuard.cpp:64:14:64:14 | x | +| BarrierGuard.cpp:60:11:60:16 | call to source | BarrierGuard.cpp:66:14:66:14 | x | +| acrossLinkTargets.cpp:19:27:19:32 | call to source | acrossLinkTargets.cpp:12:8:12:8 | x | +| clang.cpp:12:9:12:20 | sourceArray1 | clang.cpp:18:8:18:19 | sourceArray1 | +| clang.cpp:12:9:12:20 | sourceArray1 | clang.cpp:22:8:22:20 | & ... | +| clang.cpp:12:9:12:20 | sourceArray1 | clang.cpp:23:17:23:29 | & ... | +| clang.cpp:29:27:29:32 | call to source | clang.cpp:30:27:30:28 | m1 | +| clang.cpp:29:27:29:32 | call to source | clang.cpp:31:27:31:34 | call to getFirst | +| clang.cpp:35:32:35:37 | call to source | clang.cpp:38:10:38:11 | m2 | +| clang.cpp:44:35:44:40 | call to source | clang.cpp:46:17:46:18 | m2 | +| clang.cpp:51:19:51:24 | call to source | clang.cpp:52:8:52:17 | stackArray | +| clang.cpp:51:19:51:24 | call to source | clang.cpp:53:17:53:26 | stackArray | +| dispatch.cpp:9:37:9:42 | call to source | dispatch.cpp:35:16:35:25 | call to notSource1 | +| dispatch.cpp:9:37:9:42 | call to source | dispatch.cpp:43:15:43:24 | call to notSource1 | +| dispatch.cpp:10:37:10:42 | call to source | dispatch.cpp:36:16:36:25 | call to notSource2 | +| dispatch.cpp:10:37:10:42 | call to source | dispatch.cpp:44:15:44:24 | call to notSource2 | +| dispatch.cpp:37:19:37:24 | call to source | dispatch.cpp:11:38:11:38 | x | +| dispatch.cpp:45:18:45:23 | call to source | dispatch.cpp:11:38:11:38 | x | +| globals.cpp:5:17:5:22 | call to source | globals.cpp:6:10:6:14 | local | +| lambdas.cpp:8:10:8:15 | call to source | lambdas.cpp:14:3:14:6 | t | +| lambdas.cpp:8:10:8:15 | call to source | lambdas.cpp:18:8:18:8 | call to operator() | +| lambdas.cpp:8:10:8:15 | call to source | lambdas.cpp:21:3:21:6 | t | +| lambdas.cpp:8:10:8:15 | call to source | lambdas.cpp:29:3:29:6 | t | +| lambdas.cpp:8:10:8:15 | call to source | lambdas.cpp:35:8:35:8 | a | +| lambdas.cpp:8:10:8:15 | call to source | lambdas.cpp:41:8:41:8 | a | +| lambdas.cpp:43:7:43:12 | call to source | lambdas.cpp:46:7:46:7 | w | +| ref.cpp:29:11:29:16 | call to source | ref.cpp:62:10:62:11 | x3 | +| ref.cpp:53:9:53:10 | x1 | ref.cpp:56:10:56:11 | x1 | +| ref.cpp:53:13:53:14 | x2 | ref.cpp:59:10:59:11 | x2 | +| ref.cpp:53:17:53:18 | x3 | ref.cpp:62:10:62:11 | x3 | +| ref.cpp:53:21:53:22 | x4 | ref.cpp:65:10:65:11 | x4 | +| ref.cpp:55:23:55:28 | call to source | ref.cpp:56:10:56:11 | x1 | +| ref.cpp:94:15:94:20 | call to source | ref.cpp:129:13:129:15 | val | +| ref.cpp:109:15:109:20 | call to source | ref.cpp:132:13:132:15 | val | +| ref.cpp:122:23:122:28 | call to source | ref.cpp:123:13:123:15 | val | +| ref.cpp:125:19:125:24 | call to source | ref.cpp:126:13:126:15 | val | +| self-Iterator.cpp:19:23:19:28 | call to source | self-Iterator.cpp:20:10:20:10 | x | +| test.cpp:6:12:6:17 | call to source | test.cpp:7:8:7:9 | t1 | +| test.cpp:6:12:6:17 | call to source | test.cpp:9:8:9:9 | t1 | +| test.cpp:6:12:6:17 | call to source | test.cpp:10:8:10:9 | t2 | +| test.cpp:6:12:6:17 | call to source | test.cpp:15:8:15:9 | t2 | +| test.cpp:6:12:6:17 | call to source | test.cpp:26:8:26:9 | t1 | +| test.cpp:35:10:35:15 | call to source | test.cpp:30:8:30:8 | t | +| test.cpp:36:13:36:18 | call to source | test.cpp:31:8:31:8 | c | +| test.cpp:50:14:50:19 | call to source | test.cpp:58:10:58:10 | t | +| test.cpp:66:30:66:36 | source1 | test.cpp:71:8:71:9 | x4 | +| test.cpp:75:7:75:8 | u1 | test.cpp:76:8:76:9 | u1 | +| test.cpp:83:7:83:8 | u2 | test.cpp:84:8:84:18 | ... ? ... : ... | +| test.cpp:83:7:83:8 | u2 | test.cpp:86:8:86:9 | i1 | +| test.cpp:89:28:89:34 | source1 | test.cpp:90:8:90:14 | source1 | +| test.cpp:100:13:100:18 | call to source | test.cpp:103:10:103:12 | ref | +| test.cpp:138:27:138:32 | call to source | test.cpp:140:8:140:8 | y | +| test.cpp:151:33:151:38 | call to source | test.cpp:144:8:144:8 | s | +| test.cpp:151:33:151:38 | call to source | test.cpp:152:8:152:8 | y | +| test.cpp:164:34:164:39 | call to source | test.cpp:157:8:157:8 | x | +| test.cpp:164:34:164:39 | call to source | test.cpp:165:8:165:8 | y | +| test.cpp:171:11:171:16 | call to source | test.cpp:178:8:178:8 | y | +| test.cpp:245:14:245:19 | call to source | test.cpp:260:12:260:12 | x | +| test.cpp:265:22:265:27 | call to source | test.cpp:266:12:266:12 | x | +| test.cpp:305:17:305:22 | call to source | test.cpp:289:14:289:14 | x | +| test.cpp:314:4:314:9 | call to source | test.cpp:318:7:318:7 | x | +| test.cpp:347:17:347:22 | call to source | test.cpp:349:10:349:18 | globalVar | +| test.cpp:359:13:359:18 | call to source | test.cpp:365:10:365:14 | field | +| test.cpp:373:13:373:18 | call to source | test.cpp:369:10:369:14 | field | +| test.cpp:373:13:373:18 | call to source | test.cpp:375:10:375:14 | field | +| test.cpp:382:48:382:54 | source1 | test.cpp:385:8:385:10 | tmp | +| test.cpp:388:53:388:59 | source1 | test.cpp:392:8:392:10 | tmp | +| test.cpp:388:53:388:59 | source1 | test.cpp:394:10:394:12 | tmp | +| test.cpp:399:7:399:9 | tmp | test.cpp:401:8:401:10 | tmp | +| test.cpp:405:7:405:9 | tmp | test.cpp:408:8:408:10 | tmp | +| test.cpp:416:7:416:11 | local | test.cpp:418:8:418:12 | local | +| test.cpp:417:16:417:20 | ref arg local | test.cpp:418:8:418:12 | local | +| test.cpp:422:7:422:11 | local | test.cpp:424:8:424:12 | local | +| test.cpp:423:20:423:25 | ref arg & ... | test.cpp:424:8:424:12 | local | +| test.cpp:433:7:433:11 | local | test.cpp:435:8:435:12 | local | +| test.cpp:433:7:433:11 | local | test.cpp:436:8:436:13 | * ... | +| test.cpp:434:20:434:24 | ref arg local | test.cpp:435:8:435:12 | local | +| test.cpp:434:20:434:24 | ref arg local | test.cpp:436:8:436:13 | * ... | +| test.cpp:440:7:440:11 | local | test.cpp:442:8:442:12 | local | +| test.cpp:441:18:441:23 | ref arg & ... | test.cpp:442:8:442:12 | local | +| test.cpp:448:7:448:11 | local | test.cpp:450:8:450:12 | local | +| test.cpp:448:7:448:11 | local | test.cpp:451:8:451:13 | * ... | +| test.cpp:449:18:449:22 | ref arg local | test.cpp:450:8:450:12 | local | +| test.cpp:449:18:449:22 | ref arg local | test.cpp:451:8:451:13 | * ... | +| test.cpp:456:26:456:32 | source1 | test.cpp:457:9:457:22 | (statement expression) | +| test.cpp:456:26:456:32 | source1 | test.cpp:468:8:468:12 | local | +| test.cpp:472:8:472:13 | call to source | test.cpp:478:8:478:8 | x | +| test.cpp:506:8:506:13 | call to source | test.cpp:513:8:513:8 | x | +| test.cpp:517:7:517:16 | stackArray | test.cpp:521:8:521:20 | access to array | +| test.cpp:519:19:519:24 | call to source | test.cpp:521:8:521:20 | access to array | +| test.cpp:551:9:551:9 | y | test.cpp:541:10:541:10 | y | +| test.cpp:583:11:583:16 | call to source | test.cpp:590:8:590:8 | x | +| test.cpp:628:20:628:25 | ref arg buffer | test.cpp:629:17:629:22 | buffer | +| test.cpp:633:18:633:23 | call to source | test.cpp:634:8:634:8 | x | +| test.cpp:702:38:702:43 | source | test.cpp:695:8:695:10 | buf | +| test.cpp:726:11:726:16 | call to source | test.cpp:735:8:735:8 | x | +| test.cpp:733:7:733:7 | x | test.cpp:735:8:735:8 | x | +| test.cpp:749:27:749:32 | call to source | test.cpp:740:10:740:10 | x | +| test.cpp:751:27:751:32 | call to source | test.cpp:740:10:740:10 | x | +| test.cpp:753:32:753:37 | call to source | test.cpp:740:10:740:10 | x | +| test.cpp:755:32:755:37 | call to source | test.cpp:740:10:740:10 | x | +| test.cpp:769:27:769:32 | call to source | test.cpp:760:10:760:10 | x | +| test.cpp:771:27:771:32 | call to source | test.cpp:760:10:760:10 | x | +| test.cpp:773:32:773:37 | call to source | test.cpp:760:10:760:10 | x | +| test.cpp:775:32:775:37 | call to source | test.cpp:760:10:760:10 | x | +| test.cpp:788:31:788:36 | call to source | test.cpp:782:12:782:12 | x | +| test.cpp:790:31:790:36 | call to source | test.cpp:782:12:782:12 | x | +| test.cpp:797:22:797:28 | ref arg content | test.cpp:798:19:798:25 | content | +| test.cpp:842:11:842:16 | call to source | test.cpp:844:8:844:8 | y | +| test.cpp:846:13:846:27 | call to indirect_source | test.cpp:848:23:848:25 | rpx | +| test.cpp:860:54:860:59 | call to source | test.cpp:861:10:861:37 | static_local_pointer_dynamic | +| true_upon_entry.cpp:17:11:17:16 | call to source | true_upon_entry.cpp:21:8:21:8 | x | +| true_upon_entry.cpp:27:9:27:14 | call to source | true_upon_entry.cpp:29:8:29:8 | x | +| true_upon_entry.cpp:33:11:33:16 | call to source | true_upon_entry.cpp:39:8:39:8 | x | +| true_upon_entry.cpp:43:11:43:16 | call to source | true_upon_entry.cpp:49:8:49:8 | x | +| true_upon_entry.cpp:54:11:54:16 | call to source | true_upon_entry.cpp:57:8:57:8 | x | +| true_upon_entry.cpp:70:11:70:16 | call to source | true_upon_entry.cpp:78:8:78:8 | x | +| true_upon_entry.cpp:83:11:83:16 | call to source | true_upon_entry.cpp:86:8:86:8 | x | +irFlow +| BarrierGuard.cpp:5:19:5:24 | source | BarrierGuard.cpp:9:10:9:15 | source | +| BarrierGuard.cpp:13:17:13:22 | source | BarrierGuard.cpp:15:10:15:15 | source | +| BarrierGuard.cpp:21:17:21:22 | source | BarrierGuard.cpp:25:10:25:15 | source | +| BarrierGuard.cpp:29:16:29:21 | source | BarrierGuard.cpp:31:10:31:15 | source | +| BarrierGuard.cpp:29:16:29:21 | source | BarrierGuard.cpp:33:10:33:15 | source | +| BarrierGuard.cpp:49:10:49:15 | call to source | BarrierGuard.cpp:53:13:53:13 | x | +| BarrierGuard.cpp:49:10:49:15 | call to source | BarrierGuard.cpp:55:13:55:13 | x | +| BarrierGuard.cpp:60:11:60:16 | call to source | BarrierGuard.cpp:64:14:64:14 | x | +| BarrierGuard.cpp:60:11:60:16 | call to source | BarrierGuard.cpp:66:14:66:14 | x | +| acrossLinkTargets.cpp:19:27:19:32 | call to source | acrossLinkTargets.cpp:12:8:12:8 | x | +| clang.cpp:12:9:12:20 | sourceArray1 | clang.cpp:18:8:18:19 | sourceArray1 | +| clang.cpp:12:9:12:20 | sourceArray1 | clang.cpp:23:17:23:29 | & ... indirection | +| clang.cpp:29:27:29:32 | call to source | clang.cpp:30:27:30:28 | m1 | +| clang.cpp:29:27:29:32 | call to source | clang.cpp:31:27:31:34 | call to getFirst | +| clang.cpp:35:32:35:37 | call to source | clang.cpp:38:10:38:11 | m2 | +| clang.cpp:40:42:40:47 | call to source | clang.cpp:42:18:42:19 | m2 | +| clang.cpp:44:35:44:40 | call to source | clang.cpp:46:17:46:18 | m2 | +| clang.cpp:50:7:50:16 | definition of stackArray | clang.cpp:52:8:52:17 | stackArray | +| clang.cpp:50:25:50:30 | call to source | clang.cpp:53:17:53:26 | stackArray indirection | +| clang.cpp:50:35:50:40 | call to source | clang.cpp:53:17:53:26 | stackArray indirection | +| clang.cpp:51:19:51:24 | call to source | clang.cpp:53:17:53:26 | stackArray indirection | +| dispatch.cpp:9:37:9:42 | call to source | dispatch.cpp:35:16:35:25 | call to notSource1 | +| dispatch.cpp:9:37:9:42 | call to source | dispatch.cpp:43:15:43:24 | call to notSource1 | +| dispatch.cpp:10:37:10:42 | call to source | dispatch.cpp:36:16:36:25 | call to notSource2 | +| dispatch.cpp:10:37:10:42 | call to source | dispatch.cpp:44:15:44:24 | call to notSource2 | +| dispatch.cpp:16:37:16:42 | call to source | dispatch.cpp:32:16:32:24 | call to isSource2 | +| dispatch.cpp:16:37:16:42 | call to source | dispatch.cpp:40:15:40:23 | call to isSource2 | +| dispatch.cpp:22:37:22:42 | call to source | dispatch.cpp:31:16:31:24 | call to isSource1 | +| dispatch.cpp:22:37:22:42 | call to source | dispatch.cpp:39:15:39:23 | call to isSource1 | +| dispatch.cpp:22:37:22:42 | call to source | dispatch.cpp:55:22:55:30 | call to isSource1 | +| dispatch.cpp:22:37:22:42 | call to source | dispatch.cpp:58:28:58:36 | call to isSource1 | +| dispatch.cpp:33:18:33:23 | call to source | dispatch.cpp:23:38:23:38 | x | +| dispatch.cpp:37:19:37:24 | call to source | dispatch.cpp:11:38:11:38 | x | +| dispatch.cpp:41:17:41:22 | call to source | dispatch.cpp:23:38:23:38 | x | +| dispatch.cpp:45:18:45:23 | call to source | dispatch.cpp:11:38:11:38 | x | +| dispatch.cpp:69:15:69:20 | call to source | dispatch.cpp:23:38:23:38 | x | +| dispatch.cpp:73:14:73:19 | call to source | dispatch.cpp:23:38:23:38 | x | +| dispatch.cpp:81:13:81:18 | call to source | dispatch.cpp:23:38:23:38 | x | +| dispatch.cpp:107:17:107:22 | call to source | dispatch.cpp:96:8:96:8 | x | +| dispatch.cpp:140:8:140:13 | call to source | dispatch.cpp:96:8:96:8 | x | +| dispatch.cpp:144:8:144:13 | call to source | dispatch.cpp:96:8:96:8 | x | +| flowOut.cpp:5:16:5:21 | call to source | flowOut.cpp:19:9:19:9 | x | +| globals.cpp:5:17:5:22 | call to source | globals.cpp:6:10:6:14 | local | +| globals.cpp:13:23:13:28 | call to source | globals.cpp:12:10:12:24 | flowTestGlobal1 | +| globals.cpp:23:23:23:28 | call to source | globals.cpp:19:10:19:24 | flowTestGlobal2 | +| lambdas.cpp:8:10:8:15 | call to source | lambdas.cpp:14:8:14:8 | t | +| lambdas.cpp:8:10:8:15 | call to source | lambdas.cpp:18:8:18:8 | call to operator() | +| lambdas.cpp:8:10:8:15 | call to source | lambdas.cpp:21:8:21:8 | t | +| lambdas.cpp:8:10:8:15 | call to source | lambdas.cpp:29:8:29:8 | t | +| lambdas.cpp:8:10:8:15 | call to source | lambdas.cpp:35:8:35:8 | a | +| lambdas.cpp:8:10:8:15 | call to source | lambdas.cpp:41:8:41:8 | a | +| lambdas.cpp:43:7:43:12 | call to source | lambdas.cpp:46:7:46:7 | w | +| ref.cpp:29:11:29:16 | call to source | ref.cpp:62:10:62:11 | x3 | +| ref.cpp:53:9:53:10 | definition of x1 | ref.cpp:56:10:56:11 | x1 | +| ref.cpp:53:13:53:14 | definition of x2 | ref.cpp:59:10:59:11 | x2 | +| ref.cpp:53:17:53:18 | definition of x3 | ref.cpp:62:10:62:11 | x3 | +| ref.cpp:53:21:53:22 | definition of x4 | ref.cpp:65:10:65:11 | x4 | +| ref.cpp:55:23:55:28 | call to source | ref.cpp:56:10:56:11 | x1 | +| ref.cpp:94:15:94:20 | call to source | ref.cpp:129:13:129:15 | val | +| ref.cpp:109:15:109:20 | call to source | ref.cpp:132:13:132:15 | val | +| ref.cpp:122:23:122:28 | call to source | ref.cpp:123:13:123:15 | val | +| ref.cpp:125:19:125:24 | call to source | ref.cpp:126:13:126:15 | val | +| self-Iterator.cpp:19:23:19:30 | call to source | self-Iterator.cpp:20:10:20:10 | x | +| test.cpp:6:12:6:17 | call to source | test.cpp:7:8:7:9 | t1 | +| test.cpp:6:12:6:17 | call to source | test.cpp:9:8:9:9 | t1 | +| test.cpp:6:12:6:17 | call to source | test.cpp:10:8:10:9 | t2 | +| test.cpp:6:12:6:17 | call to source | test.cpp:15:8:15:9 | t2 | +| test.cpp:6:12:6:17 | call to source | test.cpp:26:8:26:9 | t1 | +| test.cpp:35:10:35:15 | call to source | test.cpp:30:8:30:8 | t | +| test.cpp:36:13:36:18 | call to source | test.cpp:31:8:31:8 | c | +| test.cpp:50:14:50:19 | call to source | test.cpp:58:10:58:10 | t | +| test.cpp:66:30:66:36 | source1 | test.cpp:71:8:71:9 | x4 | +| test.cpp:75:7:75:8 | definition of u1 | test.cpp:76:8:76:9 | u1 | +| test.cpp:83:7:83:8 | definition of u2 | test.cpp:84:8:84:18 | ... ? ... : ... | +| test.cpp:83:7:83:8 | definition of u2 | test.cpp:86:8:86:9 | i1 | +| test.cpp:89:28:89:34 | source1 indirection | test.cpp:90:8:90:14 | source1 | +| test.cpp:100:13:100:18 | call to source | test.cpp:103:10:103:12 | ref | +| test.cpp:138:27:138:32 | call to source | test.cpp:140:8:140:8 | y | +| test.cpp:151:33:151:38 | call to source | test.cpp:144:8:144:8 | s | +| test.cpp:151:33:151:38 | call to source | test.cpp:152:8:152:8 | y | +| test.cpp:164:34:164:39 | call to source | test.cpp:157:8:157:8 | x | +| test.cpp:164:34:164:39 | call to source | test.cpp:165:8:165:8 | y | +| test.cpp:171:11:171:16 | call to source | test.cpp:178:8:178:8 | y | +| test.cpp:245:14:245:19 | call to source | test.cpp:260:12:260:12 | x | +| test.cpp:265:22:265:27 | call to source | test.cpp:266:12:266:12 | x | +| test.cpp:305:17:305:22 | call to source | test.cpp:289:14:289:14 | x | +| test.cpp:314:4:314:9 | call to source | test.cpp:318:7:318:7 | x | +| test.cpp:333:17:333:22 | call to source | test.cpp:337:10:337:18 | globalVar | +| test.cpp:333:17:333:22 | call to source | test.cpp:339:10:339:18 | globalVar | +| test.cpp:333:17:333:22 | call to source | test.cpp:343:10:343:18 | globalVar | +| test.cpp:333:17:333:22 | call to source | test.cpp:349:10:349:18 | globalVar | +| test.cpp:347:17:347:22 | call to source | test.cpp:337:10:337:18 | globalVar | +| test.cpp:347:17:347:22 | call to source | test.cpp:339:10:339:18 | globalVar | +| test.cpp:347:17:347:22 | call to source | test.cpp:343:10:343:18 | globalVar | +| test.cpp:347:17:347:22 | call to source | test.cpp:349:10:349:18 | globalVar | +| test.cpp:359:13:359:18 | call to source | test.cpp:365:10:365:14 | field | +| test.cpp:373:13:373:18 | call to source | test.cpp:369:10:369:14 | field | +| test.cpp:373:13:373:18 | call to source | test.cpp:375:10:375:14 | field | +| test.cpp:382:48:382:54 | source1 | test.cpp:385:8:385:10 | tmp | +| test.cpp:388:53:388:59 | source1 | test.cpp:392:8:392:10 | tmp | +| test.cpp:388:53:388:59 | source1 | test.cpp:394:10:394:12 | tmp | +| test.cpp:399:7:399:9 | definition of tmp | test.cpp:401:8:401:10 | tmp | +| test.cpp:405:7:405:9 | definition of tmp | test.cpp:408:8:408:10 | tmp | +| test.cpp:416:7:416:11 | definition of local | test.cpp:418:8:418:12 | local | +| test.cpp:417:16:417:20 | intRefSource output argument | test.cpp:418:8:418:12 | local | +| test.cpp:422:7:422:11 | definition of local | test.cpp:424:8:424:12 | local | +| test.cpp:423:20:423:25 | intPointerSource output argument | test.cpp:424:8:424:12 | local | +| test.cpp:433:7:433:11 | definition of local | test.cpp:435:8:435:12 | local | +| test.cpp:434:20:434:24 | intPointerSource output argument | test.cpp:436:8:436:13 | * ... | +| test.cpp:440:7:440:11 | definition of local | test.cpp:442:8:442:12 | local | +| test.cpp:441:18:441:23 | intArraySource output argument | test.cpp:442:8:442:12 | local | +| test.cpp:448:7:448:11 | definition of local | test.cpp:450:8:450:12 | local | +| test.cpp:449:18:449:22 | intArraySource output argument | test.cpp:451:8:451:13 | * ... | +| test.cpp:456:26:456:32 | source1 | test.cpp:457:9:457:22 | (statement expression) | +| test.cpp:456:26:456:32 | source1 | test.cpp:468:8:468:12 | local | +| test.cpp:472:8:472:13 | call to source | test.cpp:478:8:478:8 | x | +| test.cpp:506:8:506:13 | call to source | test.cpp:513:8:513:8 | x | +| test.cpp:519:19:519:24 | call to source | test.cpp:521:8:521:20 | access to array | +| test.cpp:531:29:531:34 | call to source | test.cpp:532:8:532:9 | * ... | +| test.cpp:547:9:547:9 | definition of x | test.cpp:536:10:536:11 | * ... | +| test.cpp:551:9:551:9 | definition of y | test.cpp:541:10:541:10 | y | +| test.cpp:562:17:562:31 | call to indirect_source indirection | test.cpp:566:10:566:19 | * ... | +| test.cpp:562:17:562:31 | call to indirect_source indirection | test.cpp:568:10:568:19 | * ... | +| test.cpp:562:17:562:31 | call to indirect_source indirection | test.cpp:572:10:572:19 | * ... | +| test.cpp:562:17:562:31 | call to indirect_source indirection | test.cpp:578:10:578:19 | * ... | +| test.cpp:576:17:576:31 | call to indirect_source indirection | test.cpp:566:10:566:19 | * ... | +| test.cpp:576:17:576:31 | call to indirect_source indirection | test.cpp:568:10:568:19 | * ... | +| test.cpp:576:17:576:31 | call to indirect_source indirection | test.cpp:572:10:572:19 | * ... | +| test.cpp:576:17:576:31 | call to indirect_source indirection | test.cpp:578:10:578:19 | * ... | +| test.cpp:594:12:594:26 | call to indirect_source indirection | test.cpp:597:8:597:13 | * ... | +| test.cpp:601:20:601:20 | intPointerSource output argument | test.cpp:603:8:603:9 | * ... | +| test.cpp:607:20:607:20 | intPointerSource output argument | test.cpp:609:8:609:9 | * ... | +| test.cpp:614:20:614:20 | intPointerSource output argument | test.cpp:616:8:616:17 | * ... | +| test.cpp:628:20:628:25 | intPointerSource output argument | test.cpp:629:17:629:22 | buffer indirection | +| test.cpp:633:18:633:23 | call to source | test.cpp:634:8:634:8 | x | +| test.cpp:646:7:646:12 | call to source | test.cpp:645:8:645:8 | x | +| test.cpp:660:7:660:12 | call to source | test.cpp:658:8:658:8 | x | +| test.cpp:664:18:664:23 | call to source | test.cpp:666:8:666:16 | * ... | +| test.cpp:681:7:681:12 | call to source | test.cpp:679:8:679:16 | * ... | +| test.cpp:733:7:733:7 | definition of x | test.cpp:735:8:735:8 | x | +| test.cpp:751:27:751:32 | call to source | test.cpp:740:10:740:10 | x | +| test.cpp:753:32:753:37 | call to source | test.cpp:740:10:740:10 | x | +| test.cpp:755:32:755:37 | call to source | test.cpp:740:10:740:10 | x | +| test.cpp:771:27:771:32 | call to source | test.cpp:760:10:760:10 | x | +| test.cpp:773:32:773:37 | call to source | test.cpp:760:10:760:10 | x | +| test.cpp:775:32:775:37 | call to source | test.cpp:760:10:760:10 | x | +| test.cpp:788:31:788:36 | call to source | test.cpp:782:12:782:12 | x | +| test.cpp:790:31:790:36 | call to source | test.cpp:782:12:782:12 | x | +| test.cpp:797:22:797:28 | intPointerSource output argument | test.cpp:798:19:798:25 | content indirection | +| test.cpp:808:25:808:39 | call to indirect_source indirection | test.cpp:813:19:813:35 | * ... indirection | +| test.cpp:818:26:818:31 | call to source | test.cpp:823:10:823:27 | * ... | +| test.cpp:832:21:832:26 | call to source | test.cpp:836:10:836:22 | global_direct | +| test.cpp:842:11:842:16 | call to source | test.cpp:844:8:844:8 | y | +| test.cpp:846:13:846:27 | call to indirect_source indirection | test.cpp:848:17:848:25 | rpx indirection | +| test.cpp:853:55:853:62 | call to source | test.cpp:854:10:854:36 | * ... | +| test.cpp:860:54:860:59 | call to source | test.cpp:861:10:861:37 | static_local_pointer_dynamic | +| test.cpp:872:46:872:51 | call to source | test.cpp:875:10:875:31 | global_pointer_dynamic | +| test.cpp:880:64:880:83 | indirect_source(1) indirection | test.cpp:883:10:883:45 | static_local_array_static_indirect_1 | +| test.cpp:881:64:881:83 | indirect_source(2) indirection | test.cpp:886:19:886:54 | static_local_array_static_indirect_2 indirection | +| test.cpp:890:54:890:61 | source | test.cpp:893:10:893:36 | static_local_pointer_static | +| test.cpp:891:65:891:84 | indirect_source(1) indirection | test.cpp:895:19:895:56 | static_local_pointer_static_indirect_1 indirection | +| test.cpp:901:56:901:75 | indirect_source(1) indirection | test.cpp:907:10:907:39 | global_array_static_indirect_1 | +| test.cpp:902:56:902:75 | indirect_source(2) indirection | test.cpp:911:19:911:48 | global_array_static_indirect_2 indirection | +| test.cpp:914:46:914:53 | source | test.cpp:919:10:919:30 | global_pointer_static | +| test.cpp:915:57:915:76 | indirect_source(1) indirection | test.cpp:921:19:921:50 | global_pointer_static_indirect_1 indirection | +| true_upon_entry.cpp:9:11:9:16 | call to source | true_upon_entry.cpp:13:8:13:8 | x | +| true_upon_entry.cpp:17:11:17:16 | call to source | true_upon_entry.cpp:21:8:21:8 | x | +| true_upon_entry.cpp:27:9:27:14 | call to source | true_upon_entry.cpp:29:8:29:8 | x | +| true_upon_entry.cpp:33:11:33:16 | call to source | true_upon_entry.cpp:39:8:39:8 | x | +| true_upon_entry.cpp:43:11:43:16 | call to source | true_upon_entry.cpp:49:8:49:8 | x | +| true_upon_entry.cpp:54:11:54:16 | call to source | true_upon_entry.cpp:57:8:57:8 | x | +| true_upon_entry.cpp:62:11:62:16 | call to source | true_upon_entry.cpp:66:8:66:8 | x | +| true_upon_entry.cpp:70:11:70:16 | call to source | true_upon_entry.cpp:78:8:78:8 | x | +| true_upon_entry.cpp:83:11:83:16 | call to source | true_upon_entry.cpp:86:8:86:8 | x | +| true_upon_entry.cpp:98:11:98:16 | call to source | true_upon_entry.cpp:105:8:105:8 | x | diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp index 73c9fd28b933..029405aedca7 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp @@ -1,5 +1,5 @@ int source(); -void sink(int); void sink(const int *); void sink(int **); void indirect_sink(...); +void sink(...); void indirect_sink(...); void intraprocedural_with_local_flow() { int t2; @@ -836,4 +836,90 @@ namespace MoreGlobalTests { sink(global_direct); // $ ir MISSING: ast indirect_sink(global_direct); // clean } +} + +void test_references() { + int x = source(); + int &y = x; + sink(y); // $ ast,ir + + int* px = indirect_source(); + int*& rpx = px; + indirect_sink((int*)rpx); // $ ast,ir +} + +namespace GlobalArrays { + void test1() { + static const int static_local_array_dynamic[] = { ::source() }; + sink(*static_local_array_dynamic); // $ ir MISSING: ast + } + + const int* source(bool); + + void test2() { + static const int* static_local_pointer_dynamic = source(true); + sink(static_local_pointer_dynamic); // $ ast,ir + } + + static const int global_array_dynamic[] = { ::source() }; + + void test3() { + sink(*global_array_dynamic); // $ MISSING: ir,ast // Missing in IR because no 'IRFunction' for global_array is generated for some reason. + } + + const int* source(bool); + + static const int* global_pointer_dynamic = source(true); + + void test4() { + sink(global_pointer_dynamic); // $ ir MISSING: ast + } + + void test5() { + static const char static_local_array_static[] = "source"; + static const char static_local_array_static_indirect_1[] = "indirect_source(1)"; + static const char static_local_array_static_indirect_2[] = "indirect_source(2)"; + sink(static_local_array_static); // clean + sink(static_local_array_static_indirect_1); // $ ir MISSING: ast + indirect_sink(static_local_array_static_indirect_1); // clean + sink(static_local_array_static_indirect_2); // clean + indirect_sink(static_local_array_static_indirect_2); // $ ir MISSING: ast + } + + void test6() { + static const char* static_local_pointer_static = "source"; + static const char* static_local_pointer_static_indirect_1 = "indirect_source(1)"; + static const char* static_local_pointer_static_indirect_2 = "indirect_source(2)"; + sink(static_local_pointer_static); // $ ir MISSING: ast + sink(static_local_pointer_static_indirect_1); // clean + indirect_sink(static_local_pointer_static_indirect_1); // $ ir MISSING: ast + sink(static_local_pointer_static_indirect_2); // clean: static_local_pointer_static_indirect_2 does not have 2 indirections + indirect_sink(static_local_pointer_static_indirect_2); // clean: static_local_pointer_static_indirect_2 does not have 2 indirections + } + + static const char global_array_static[] = "source"; + static const char global_array_static_indirect_1[] = "indirect_source(1)"; + static const char global_array_static_indirect_2[] = "indirect_source(2)"; + + void test7() { + sink(global_array_static); // clean + sink(*global_array_static); // clean + sink(global_array_static_indirect_1); // $ ir MISSING: ast + sink(*global_array_static_indirect_1); // clean + indirect_sink(global_array_static); // clean + indirect_sink(global_array_static_indirect_1); // clean + indirect_sink(global_array_static_indirect_2); // $ ir MISSING: ast + } + + static const char* global_pointer_static = "source"; + static const char* global_pointer_static_indirect_1 = "indirect_source(1)"; + static const char* global_pointer_static_indirect_2 = "indirect_source(2)"; + + void test8() { + sink(global_pointer_static); // $ ir MISSING: ast + sink(global_pointer_static_indirect_1); // clean + indirect_sink(global_pointer_static_indirect_1); // $ ir MISSING: ast + sink(global_pointer_static_indirect_2); // clean: global_pointer_static_indirect_2 does not have 2 indirections + indirect_sink(global_pointer_static_indirect_2); // clean: global_pointer_static_indirect_2 does not have 2 indirections + } } \ No newline at end of file From f61a7373a1e9e84d516366234770954f61a3ac58 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Thu, 16 Nov 2023 16:27:53 +0000 Subject: [PATCH 08/10] C++: Add more global-variable flow tests. --- .../dataflow/dataflow-tests/TestBase.qll | 111 ++++++++++++++++++ .../dataflow-consistency.expected | 6 + .../dataflow-tests/test-source-sink.expected | 2 - .../dataflow/dataflow-tests/test.cpp | 4 +- 4 files changed, 119 insertions(+), 4 deletions(-) create mode 100644 cpp/ql/test/library-tests/dataflow/dataflow-tests/TestBase.qll diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/TestBase.qll b/cpp/ql/test/library-tests/dataflow/dataflow-tests/TestBase.qll new file mode 100644 index 000000000000..528e7ca6ad33 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/TestBase.qll @@ -0,0 +1,111 @@ +module AstTest { + import semmle.code.cpp.dataflow.DataFlow + private import semmle.code.cpp.controlflow.Guards + + /** + * A `BarrierGuard` that stops flow to all occurrences of `x` within statement + * S in `if (guarded(x)) S`. + */ + // This is tested in `BarrierGuard.cpp`. + predicate testBarrierGuard(GuardCondition g, Expr checked, boolean isTrue) { + g.(FunctionCall).getTarget().getName() = "guarded" and + checked = g.(FunctionCall).getArgument(0) and + isTrue = true + } + + /** Common data flow configuration to be used by tests. */ + module AstTestAllocationConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { + source.asExpr().(FunctionCall).getTarget().getName() = "source" + or + source.asParameter().getName().matches("source%") + or + source.asExpr().(FunctionCall).getTarget().getName() = "indirect_source" + or + source.(DataFlow::DefinitionByReferenceNode).getParameter().getName().matches("ref_source%") + or + // Track uninitialized variables + exists(source.asUninitialized()) + } + + predicate isSink(DataFlow::Node sink) { + exists(FunctionCall call | + call.getTarget().getName() = ["sink", "indirect_sink"] and + sink.asExpr() = call.getAnArgument() + ) + } + + predicate isBarrier(DataFlow::Node barrier) { + barrier.asExpr().(VariableAccess).getTarget().hasName("barrier") or + barrier = DataFlow::BarrierGuard::getABarrierNode() + } + } + + module AstFlow = DataFlow::Global; +} + +module IRTest { + private import cpp + import semmle.code.cpp.ir.dataflow.DataFlow + private import semmle.code.cpp.ir.IR + private import semmle.code.cpp.controlflow.IRGuards + + /** + * A `BarrierGuard` that stops flow to all occurrences of `x` within statement + * S in `if (guarded(x)) S`. + */ + // This is tested in `BarrierGuard.cpp`. + predicate testBarrierGuard(IRGuardCondition g, Expr checked, boolean isTrue) { + exists(Call call | + call = g.getUnconvertedResultExpression() and + call.getTarget().hasName("guarded") and + checked = call.getArgument(0) and + isTrue = true + ) + } + + /** Common data flow configuration to be used by tests. */ + module IRTestAllocationConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { + source.asExpr().(FunctionCall).getTarget().getName() = "source" + or + source.asIndirectExpr(1).(FunctionCall).getTarget().getName() = "indirect_source" + or + source.asExpr().(StringLiteral).getValue() = "source" + or + // indirect_source(n) gives the dataflow node representing the indirect node after n dereferences. + exists(int n, string s | + n = s.regexpCapture("indirect_source\\((\\d)\\)", 1).toInt() and + source.asIndirectExpr(n).(StringLiteral).getValue() = s + ) + or + source.asParameter().getName().matches("source%") + or + source.(DataFlow::DefinitionByReferenceNode).getParameter().getName().matches("ref_source%") + or + exists(source.asUninitialized()) + } + + predicate isSink(DataFlow::Node sink) { + exists(FunctionCall call, Expr e | e = call.getAnArgument() | + call.getTarget().getName() = "sink" and + sink.asExpr() = e + or + call.getTarget().getName() = "indirect_sink" and + sink.asIndirectExpr() = e + ) + } + + predicate isBarrier(DataFlow::Node barrier) { + exists(Expr barrierExpr | barrierExpr in [barrier.asExpr(), barrier.asIndirectExpr()] | + barrierExpr.(VariableAccess).getTarget().hasName("barrier") + ) + or + barrier = DataFlow::BarrierGuard::getABarrierNode() + or + barrier = DataFlow::BarrierGuard::getAnIndirectBarrierNode() + } + } + + module IRFlow = DataFlow::Global; +} diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-consistency.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-consistency.expected index 7d25b5f79627..7dca44a474ba 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-consistency.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-consistency.expected @@ -1,5 +1,11 @@ uniqueEnclosingCallable +| test.cpp:864:44:864:58 | {...} | Node should have one enclosing callable but has 0. | +| test.cpp:864:47:864:54 | call to source | Node should have one enclosing callable but has 0. | +| test.cpp:872:46:872:51 | call to source | Node should have one enclosing callable but has 0. | +| test.cpp:872:53:872:56 | 1 | Node should have one enclosing callable but has 0. | uniqueCallEnclosingCallable +| test.cpp:864:47:864:54 | call to source | Call should have one enclosing callable but has 0. | +| test.cpp:872:46:872:51 | call to source | Call should have one enclosing callable but has 0. | uniqueType uniqueNodeLocation missingLocation diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected index c98bc68c884d..6e84e48c792e 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected @@ -286,11 +286,9 @@ irFlow | test.cpp:853:55:853:62 | call to source | test.cpp:854:10:854:36 | * ... | | test.cpp:860:54:860:59 | call to source | test.cpp:861:10:861:37 | static_local_pointer_dynamic | | test.cpp:872:46:872:51 | call to source | test.cpp:875:10:875:31 | global_pointer_dynamic | -| test.cpp:880:64:880:83 | indirect_source(1) indirection | test.cpp:883:10:883:45 | static_local_array_static_indirect_1 | | test.cpp:881:64:881:83 | indirect_source(2) indirection | test.cpp:886:19:886:54 | static_local_array_static_indirect_2 indirection | | test.cpp:890:54:890:61 | source | test.cpp:893:10:893:36 | static_local_pointer_static | | test.cpp:891:65:891:84 | indirect_source(1) indirection | test.cpp:895:19:895:56 | static_local_pointer_static_indirect_1 indirection | -| test.cpp:901:56:901:75 | indirect_source(1) indirection | test.cpp:907:10:907:39 | global_array_static_indirect_1 | | test.cpp:902:56:902:75 | indirect_source(2) indirection | test.cpp:911:19:911:48 | global_array_static_indirect_2 indirection | | test.cpp:914:46:914:53 | source | test.cpp:919:10:919:30 | global_pointer_static | | test.cpp:915:57:915:76 | indirect_source(1) indirection | test.cpp:921:19:921:50 | global_pointer_static_indirect_1 indirection | diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp index 029405aedca7..a46833c1d165 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp @@ -880,7 +880,7 @@ namespace GlobalArrays { static const char static_local_array_static_indirect_1[] = "indirect_source(1)"; static const char static_local_array_static_indirect_2[] = "indirect_source(2)"; sink(static_local_array_static); // clean - sink(static_local_array_static_indirect_1); // $ ir MISSING: ast + sink(static_local_array_static_indirect_1); // $ MISSING: ast,ir indirect_sink(static_local_array_static_indirect_1); // clean sink(static_local_array_static_indirect_2); // clean indirect_sink(static_local_array_static_indirect_2); // $ ir MISSING: ast @@ -904,7 +904,7 @@ namespace GlobalArrays { void test7() { sink(global_array_static); // clean sink(*global_array_static); // clean - sink(global_array_static_indirect_1); // $ ir MISSING: ast + sink(global_array_static_indirect_1); // $ MISSING: ir,ast sink(*global_array_static_indirect_1); // clean indirect_sink(global_array_static); // clean indirect_sink(global_array_static_indirect_1); // clean From 3631c06f96a37d711628a0d257225b22fa8e4af4 Mon Sep 17 00:00:00 2001 From: Benjamin Rodes Date: Fri, 17 Nov 2023 12:44:39 -0500 Subject: [PATCH 09/10] Adding support to find non-constant string concat. Find str concat with non-const args separately and trace these to format specifiers. --- .../Likely Bugs/Format/NonConstantFormat.ql | 123 ++++++++++++++++-- .../Likely Bugs/Format/StrConcatenation.qll | 54 ++++++++ 2 files changed, 163 insertions(+), 14 deletions(-) create mode 100644 cpp/ql/src/Likely Bugs/Format/StrConcatenation.qll diff --git a/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql b/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql index b5a80bebdc55..57e08dfd41c9 100644 --- a/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql +++ b/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql @@ -17,7 +17,11 @@ import semmle.code.cpp.ir.dataflow.TaintTracking import semmle.code.cpp.security.FlowSources + import StrConcatenation + /** + * Functions not statically observed to be called by any other function + */ class UncalledFunction extends Function { UncalledFunction() { not exists(Call c | c.getTarget() = this) @@ -50,6 +54,16 @@ } + /** + * 1) The return of any function so long as the function does not have a definition and + * is not in the white list (see `whitelistFunction`) + * 2) A parameter to a function that is not called (see `UncalledFunction`) + * 3) An out parameter of a function so long as the function has no definition and is not the + * output of a formatting function call (see `FormattingFunctionCall`). + * Rationale: The output of a FormattingFunctionCall is a step in a data flow, not a source of + * const or non-const data. + * 4) A flow source (see `FlowSources`) + */ predicate isNonConst(DataFlow::Node node){ exists(Call fc | fc = [node.asExpr(), node.asIndirectExpr()] | not ( @@ -70,12 +84,21 @@ + /** + * `sink` is DataFlow::Node representing format string of a `FormatingFunctionCall` + * `formatSTring` is the Expr representing the format string of `sink`. + */ predicate isSinkImpl(DataFlow::Node sink, Expr formatString) { [sink.asExpr(), sink.asIndirectExpr()] = formatString and exists(FormattingFunctionCall fc | formatString = fc.getArgument(fc.getFormatParameterIndex())) } + /** + * Flow from a non-constant source to a format string of a formatting function call. + * + * Continue flow through `whitelistFunction` calls. + */ module NonConstFlowConfig implements DataFlow::ConfigSig { predicate isSource(DataFlow::Node source) { isNonConst(source) @@ -94,6 +117,21 @@ module NonConstFlow = TaintTracking::Global; + /** + * Flow from a non-constant source to a format string of a formatting function call. + * Used to find non-constant format strings by finding format variables with no + * constant source (the non-existence of any literal path to a format would indicate a + * potential vulnerability). + * NOTE: this is an approximation. If one literal flows to a format sink, then it appears + * to not be vulnerable. For example, if two paths exist to the format, one with a literal + * and one without, this approach will result in a false negative. + * Another potential for false negatives is string concatenation with a literal. + * We address these false negatives partially by combining results using the `NonConstFlow` + * trace, and also by providing specific barriers for known concatenation with literals + * (Special casing how flow should be traced through string concatenation using `LiteralToStrConcatFlow`). + * + * Continues flow through `whitelistFunction` calls. + */ module LiteralToFormatConfig implements DataFlow::ConfigSig{ predicate isSource(DataFlow::Node source) { source.asExpr() instanceof StringLiteral @@ -110,6 +148,10 @@ module LiteralToFormatConfig implements DataFlow::ConfigSig{ and n2.asIndirectExpr() = c ) } + + predicate isBarrierOut(DataFlow::Node node) { + exists(StrConcatenation sc | sc.getAnOperand() = [node.asExpr(), node.asIndirectExpr()]) + } } module LiteralToFormatTrace = TaintTracking::Global; @@ -128,18 +170,66 @@ predicate isNonLiteralfmt(Expr fmt){ not isLiteralFormat(fmt) } - - -// from FormattingFunctionCall call, Expr formatString -// where -// call.getArgument(call.getFormatParameterIndex()) = formatString and -// exists(DataFlow::Node sink | -// NonConstFlow::flowTo(sink) and -// isSinkImpl(sink, formatString) -// ) -// select formatString, -// "The format string argument to " + call.getTarget().getName() + -// " should be constant to prevent security issues and other potential errors." + +module LiteralToStrConcatConfig implements DataFlow::ConfigSig{ + predicate isSource(DataFlow::Node source) { + source.asExpr() instanceof Literal + } + + predicate isSink(DataFlow::Node sink) { + exists(StrConcatenation sc | sc.getAnOperand() = [sink.asExpr(), sink.asIndirectExpr()]) + } + + predicate isAdditionalFlowStep(DataFlow::Node n1, DataFlow::Node n2){ + exists(Call c, int ind | + whitelistFunction(c.getTarget(), ind) + and c.getArgument(ind) = [n1.asExpr(), n1.asIndirectExpr()] + and n2.asIndirectExpr() = c + ) + } +} + +module LiteralToStrConcatFlow = TaintTracking::Global; + +predicate isLiteralStrConcatArg(StrConcatenation cat, Expr arg){ + exists(DataFlow::Node src, DataFlow::Node sink | + LiteralToStrConcatFlow::flow(src,sink) + and + arg = [sink.asExpr(), sink.asIndirectExpr()] + and + cat.getAnOperand() = arg + ) +} + +predicate isNonLiteralStrConcatArg(StrConcatenation cat, Expr arg){ + cat.getAnOperand() = arg + and + not isLiteralStrConcatArg(cat, arg) +} + + +module NonConstStrCatToFormatConfig implements DataFlow::ConfigSig{ + predicate isSource(DataFlow::Node source) { + exists(StrConcatenation sc | + isNonLiteralStrConcatArg(sc, _) + and + sc.getResultExpr() = [source.asExpr(), source.asIndirectExpr()]) + } + + predicate isSink(DataFlow::Node sink) { + isSinkImpl(sink, _) + } + + predicate isAdditionalFlowStep(DataFlow::Node n1, DataFlow::Node n2){ + exists(Call c, int ind | + whitelistFunction(c.getTarget(), ind) + and c.getArgument(ind) = [n1.asExpr(), n1.asIndirectExpr()] + and n2.asIndirectExpr() = c + ) + } +} +module NonConstStrCatToFormatFlow = TaintTracking::Global; + from FormattingFunctionCall call, Expr formatString where @@ -147,10 +237,15 @@ where ( isNonLiteralfmt(formatString) or - exists(DataFlow::Node sink | + exists(DataFlow::Node sink | NonConstFlow::flowTo(sink) and isSinkImpl(sink, formatString) - ) + ) + or + exists(DataFlow::Node sink | + NonConstStrCatToFormatFlow::flowTo(sink) and + isSinkImpl(sink, formatString) + ) ) select formatString, "The format string argument to " + call.getTarget().getName() + diff --git a/cpp/ql/src/Likely Bugs/Format/StrConcatenation.qll b/cpp/ql/src/Likely Bugs/Format/StrConcatenation.qll new file mode 100644 index 000000000000..7e10fd47b976 --- /dev/null +++ b/cpp/ql/src/Likely Bugs/Format/StrConcatenation.qll @@ -0,0 +1,54 @@ +import cpp +import semmle.code.cpp.models.implementations.Strcat +import semmle.code.cpp.models.interfaces.FormattingFunction + +class StrConcatenation extends Call{ + StrConcatenation(){ + exists(FormattingFunctionCall fc | + this = fc + ) + or + exists(StrcatFunction f | this.getTarget() = f) or + exists(Call call, Operator op | + call.getTarget() = op and + op.hasQualifiedName(["std", "bsl"], "operator+") and + op.getType().(UserType).hasQualifiedName(["std", "bsl"], "basic_string") and + this = call + ) or + this.getTarget().hasQualifiedName(["std", "bsl"], "operator<<") + } + + /** + * Gets the operands of this concatenation. + * Will not return out param for sprintf-like functions, but will consider the format string + * to be part of the operands. + */ + Expr getAnOperand(){ + result = this.getAnArgument() + and not result instanceof Call // I believe this addresses odd behavior with overloaded operators + and + ( + result.getUnderlyingType().stripType().getName() = "char" + or + result.getUnderlyingType().getName() = "string" + or + result.getType().getUnspecifiedType().(UserType).hasQualifiedName(["std", "bsl"], "basic_string") + ) + and + (this instanceof FormattingFunctionCall implies + ( + result = this.(FormattingFunctionCall).getFormat() or + exists(int n | result = this.getArgument(n) and + n >= this.(FormattingFunctionCall).getTarget().(FormattingFunction).getFirstFormatArgumentIndex()) + ) + ) + } + + Expr getResultExpr(){ + if this instanceof FormattingFunctionCall + then + result = this.(FormattingFunctionCall).getOutputArgument(_) + else + result = this.(Call) + } +} \ No newline at end of file From 0eb2022e2cd9430601a0695637717b85f3a6399e Mon Sep 17 00:00:00 2001 From: Benjamin Rodes Date: Fri, 17 Nov 2023 11:41:22 -0800 Subject: [PATCH 10/10] Reducing the scope of the prior isNonConst predicate. --- .../Likely Bugs/Format/NonConstantFormat.ql | 45 ++++++++++++------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql b/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql index 57e08dfd41c9..1d84da36355e 100644 --- a/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql +++ b/cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql @@ -17,6 +17,7 @@ import semmle.code.cpp.ir.dataflow.TaintTracking import semmle.code.cpp.security.FlowSources + import semmle.code.cpp.models.implementations.Strcpy import StrConcatenation /** @@ -59,26 +60,31 @@ * is not in the white list (see `whitelistFunction`) * 2) A parameter to a function that is not called (see `UncalledFunction`) * 3) An out parameter of a function so long as the function has no definition and is not the - * output of a formatting function call (see `FormattingFunctionCall`). - * Rationale: The output of a FormattingFunctionCall is a step in a data flow, not a source of + * output of a formatting function call (see `FormattingFunctionCall`) or a call to a strcpy variant + * (see `StrcpyFunction`). + * Rationale: The output of a FormattingFunctionCall and StrcpyFunction is a step in a data flow, not a source of * const or non-const data. * 4) A flow source (see `FlowSources`) */ predicate isNonConst(DataFlow::Node node){ - exists(Call fc | fc = [node.asExpr(), node.asIndirectExpr()] | - not ( - whitelistFunction(fc.getTarget(), _) or - fc.getTarget().hasDefinition() - ) - ) - or + // exists(Call fc | fc = [node.asExpr(), node.asIndirectExpr()] | + // not ( + // whitelistFunction(fc.getTarget(), _) or + // fc.getTarget().hasDefinition() + // ) + // ) + // or exists(UncalledFunction f | f.getAParameter() = node.asParameter()) - or - ( - node instanceof DataFlow::DefinitionByReferenceNode and - not exists(FormattingFunctionCall fc | node.asDefiningArgument() = fc.getOutputArgument(_)) and - not exists(Call c | c.getAnArgument() = node.asDefiningArgument() and c.getTarget().hasDefinition()) - ) + // or + // ( + // node instanceof DataFlow::DefinitionByReferenceNode and + // not exists(FormattingFunctionCall fc | node.asDefiningArgument() = fc.getOutputArgument(_)) and + // not exists(Call c | c.getAnArgument() = node.asDefiningArgument() and c.getTarget().hasDefinition()) and + // not exists(StrcpyFunction fc, Call c | c.(Call).getTarget() = fc | + // node.asDefiningArgument() = c.getArgument(fc.getParamDest()) + // or + // [node.asExpr(), node.asIndirectExpr()] = c) + // ) or node instanceof FlowSource } @@ -247,7 +253,16 @@ where isSinkImpl(sink, formatString) ) ) + select formatString, "The format string argument to " + call.getTarget().getName() + " should be constant to prevent security issues and other potential errors." + +// import NonConstFlow::PathGraph +// from NonConstFlow::PathNode source, NonConstFlow::PathNode sink +// where +// NonConstFlow::flowPath(source, sink) and +// exists(Expr formatString| isSinkImpl(sink.getNode(), formatString) +// ) +// select sink, source, sink, "TEST" \ No newline at end of file