Skip to content

Commit

Permalink
Switch to implementation using a universal read-only ContentSet
Browse files Browse the repository at this point in the history
  • Loading branch information
smowton committed Oct 18, 2024
1 parent 63ff0cb commit fe808f7
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 112 deletions.
113 changes: 33 additions & 80 deletions go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll
Original file line number Diff line number Diff line change
Expand Up @@ -143,102 +143,55 @@ predicate jumpStep(Node n1, Node n2) {
* Thus, `node2` references an object with a content `x` that contains the
* value of `node1`.
*/
predicate storeStep(Node node1, ContentSet c, Node node2) {
// a write `(*p).f = rhs` is modeled as two store steps: `rhs` is flows into field `f` of `(*p)`,
// which in turn flows into the pointer content of `p`
exists(Write w, Field f, DataFlow::Node base, DataFlow::Node rhs | w.writesField(base, f, rhs) |
node1 = rhs and
node2.(PostUpdateNode).getPreUpdateNode() = base and
c = any(DataFlow::FieldContent fc | fc.getField() = f)
predicate storeStep(Node node1, ContentSet cs, Node node2) {
exists(Content c | cs.asOneContent() = c |
// a write `(*p).f = rhs` is modeled as two store steps: `rhs` is flows into field `f` of `(*p)`,
// which in turn flows into the pointer content of `p`
exists(Write w, Field f, DataFlow::Node base, DataFlow::Node rhs | w.writesField(base, f, rhs) |
node1 = rhs and
node2.(PostUpdateNode).getPreUpdateNode() = base and
c = any(DataFlow::FieldContent fc | fc.getField() = f)
or
node1 = base and
node2.(PostUpdateNode).getPreUpdateNode() = node1.(PointerDereferenceNode).getOperand() and
c = any(DataFlow::PointerContent pc | pc.getPointerType() = node2.getType())
)
or
node1 = base and
node2.(PostUpdateNode).getPreUpdateNode() = node1.(PointerDereferenceNode).getOperand() and
node1 = node2.(AddressOperationNode).getOperand() and
c = any(DataFlow::PointerContent pc | pc.getPointerType() = node2.getType())
or
containerStoreStep(node1, node2, c)
)
or
node1 = node2.(AddressOperationNode).getOperand() and
c = any(DataFlow::PointerContent pc | pc.getPointerType() = node2.getType())
or
FlowSummaryImpl::Private::Steps::summaryStoreStep(node1.(FlowSummaryNode).getSummaryNode(), c,
FlowSummaryImpl::Private::Steps::summaryStoreStep(node1.(FlowSummaryNode).getSummaryNode(), cs,
node2.(FlowSummaryNode).getSummaryNode())
or
containerStoreStep(node1, node2, c)
}

/**
* Gets a `DataFlow::ContentSet` containing a single `Content` appropriate
* for reading a field, element, map value or channel message of type `containerType`.
*/
DataFlow::ContentSet getContentForType(Type containerType) {
containerType instanceof ArrayType and
result instanceof DataFlow::ArrayContent
or
containerType instanceof SliceType and
result instanceof DataFlow::ArrayContent
or
containerType instanceof ChanType and
result instanceof DataFlow::CollectionContent
or
containerType instanceof MapType and
result instanceof DataFlow::MapValueContent
or
result.(DataFlow::PointerContent).getPointerType() = containerType
or
exists(Field f | f = containerType.(StructType).getField(_) |
result.(DataFlow::FieldContent).getField() = f
)
}

/**
* Gets the type of an array/slice element, channel value, map value,
* pointer base type or named-type underlying type relating to `containerType`.
*/
Type getElementType(Type containerType) {
result = containerType.(ArrayType).getElementType() or
result = containerType.(SliceType).getElementType() or
result = containerType.(ChanType).getElementType() or
result = containerType.(MapType).getValueType() or
result = containerType.(PointerType).getPointerType() or
result = containerType.(NamedType).getUnderlyingType()
}

/**
* Gets the type of an array/slice element, channel value, map value,
* pointer base type, named-type underlying type or struct field type
* relating to `containerType`.
*/
Type getAnElementOrFieldType(Type containerType) {
result = getElementType(containerType) or
result = containerType.(StructType).getField(_).getType()
}

/**
* Holds if data can flow from `node1` to `node2` via a read of `c`.
* Thus, `node1` references an object with a content `c` whose value ends up in
* `node2`.
*/
predicate readStep(Node node1, ContentSet c, Node node2) {
node1 = node2.(PointerDereferenceNode).getOperand() and
c = any(DataFlow::PointerContent pc | pc.getPointerType() = node1.getType())
or
exists(FieldReadNode read |
node2 = read and
node1 = read.getBase() and
c = any(DataFlow::FieldContent fc | fc.getField() = read.getField())
predicate readStep(Node node1, ContentSet cs, Node node2) {
exists(Content c | cs.asOneContent() = c |
node1 = node2.(PointerDereferenceNode).getOperand() and
c = any(DataFlow::PointerContent pc | pc.getPointerType() = node1.getType())
or
exists(FieldReadNode read |
node2 = read and
node1 = read.getBase() and
c = any(DataFlow::FieldContent fc | fc.getField() = read.getField())
)
or
containerReadStep(node1, node2, c)
)
or
FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), c,
FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), cs,
node2.(FlowSummaryNode).getSummaryNode())
or
containerReadStep(node1, node2, c)
or
exists(Type containerType |
any(ImplicitFieldReadNode ifrn).shouldImplicitlyReadAllFields(node1) and
getAnElementOrFieldType*(node1.getType()) = containerType
|
c = getContentForType(containerType) and
node1 = node2
)
any(ImplicitFieldReadNode ifrn).shouldImplicitlyReadAllFields(node1) and
cs.isUniversalContent() and
node1 = node2
}

/**
Expand Down
54 changes: 49 additions & 5 deletions go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,13 @@ class Content extends TContent {
) {
filepath = "" and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0
}

/**
* Gets the `ContentSet` contaning only this content.
*/
ContentSet asContentSet() {
result.asOneContent() = this
}
}

/** A reference through a field. */
Expand Down Expand Up @@ -249,21 +256,35 @@ class SyntheticFieldContent extends Content, TSyntheticFieldContent {
override string toString() { result = s.toString() }
}

private newtype TContentSet =
TOneContent(Content c) or
TAllContent()

/**
* An entity that represents a set of `Content`s.
*
* The set may be interpreted differently depending on whether it is
* stored into (`getAStoreContent`) or read from (`getAReadContent`).
*/
class ContentSet instanceof Content {
class ContentSet instanceof TContentSet {
/** Gets a content that may be stored into when storing into this set. */
Content getAStoreContent() { result = this }
Content getAStoreContent() {
this = TOneContent(result)
}

/** Gets a content that may be read from when reading from this set. */
Content getAReadContent() { result = this }
Content getAReadContent() {
this = TOneContent(result)
or
this = TAllContent() and result = any(Content c)

Check warning

Code scanning / CodeQL

Redundant cast Warning

Redundant cast to
Content
.
}

/** Gets a textual representation of this content set. */
string toString() { result = super.toString() }
string toString() {
exists(Content c | this = TOneContent(c) | result = c.toString())
or
this = TAllContent() and result = "all content"
}

/**
* Holds if this element is at the specified location.
Expand All @@ -275,7 +296,30 @@ class ContentSet instanceof Content {
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
exists(Content c | this = TOneContent(c) |
c.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
)
or
this = TAllContent() and
filepath = "" and
startline = 0 and
startcolumn = 0 and
endline = 0 and
endcolumn = 0
}

/**
* If this is a singleton content set, returns the content.
*/
Content asOneContent() {
this = TOneContent(result)
}

/**
* Holds if this is a universal content set.
*/
predicate isUniversalContent() {
this = TAllContent()
}
}

Expand Down
42 changes: 22 additions & 20 deletions go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll
Original file line number Diff line number Diff line change
Expand Up @@ -55,26 +55,28 @@ module Input implements InputSig<Location, DataFlowImplSpecific::GoDataFlow> {
}

string encodeContent(ContentSet cs, string arg) {
exists(Field f, string package, string className, string fieldName |
f = cs.(FieldContent).getField() and
f.hasQualifiedName(package, className, fieldName) and
result = "Field" and
arg = package + "." + className + "." + fieldName
)
or
exists(SyntheticField f |
f = cs.(SyntheticFieldContent).getField() and result = "SyntheticField" and arg = f
exists(Content c | cs.asOneContent() = c |
exists(Field f, string package, string className, string fieldName |
f = c.(FieldContent).getField() and
f.hasQualifiedName(package, className, fieldName) and
result = "Field" and
arg = package + "." + className + "." + fieldName
)
or
exists(SyntheticField f |
f = c.(SyntheticFieldContent).getField() and result = "SyntheticField" and arg = f
)
or
c instanceof ArrayContent and result = "ArrayElement" and arg = ""
or
c instanceof CollectionContent and result = "Element" and arg = ""
or
c instanceof MapKeyContent and result = "MapKey" and arg = ""
or
c instanceof MapValueContent and result = "MapValue" and arg = ""
or
c instanceof PointerContent and result = "Dereference" and arg = ""
)
or
cs instanceof ArrayContent and result = "ArrayElement" and arg = ""
or
cs instanceof CollectionContent and result = "Element" and arg = ""
or
cs instanceof MapKeyContent and result = "MapKey" and arg = ""
or
cs instanceof MapValueContent and result = "MapValue" and arg = ""
or
cs instanceof PointerContent and result = "Dereference" and arg = ""
}

bindingset[token]
Expand Down Expand Up @@ -339,7 +341,7 @@ module Private {
SummaryComponent qualifier() { result = argument(-1) }

/** Gets a summary component for field `f`. */
SummaryComponent field(Field f) { result = content(any(FieldContent c | c.getField() = f)) }
SummaryComponent field(Field f) { result = content(any(FieldContent c | c.getField() = f).asContentSet()) }

/** Gets a summary component that represents the return value of a call. */
SummaryComponent return() { result = SC::return(_) }
Expand Down
31 changes: 26 additions & 5 deletions go/ql/lib/semmle/go/dataflow/internal/TaintTrackingUtil.qll
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,38 @@ predicate localTaintStep(DataFlow::Node src, DataFlow::Node sink) {
FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(src, sink, _)
}

private Type getElementType(Type containerType) {
result = containerType.(ArrayType).getElementType() or
result = containerType.(SliceType).getElementType() or
result = containerType.(ChanType).getElementType() or
result = containerType.(MapType).getValueType() or
result = containerType.(PointerType).getPointerType()
}

/**
* Holds if default `TaintTracking::Configuration`s should allow implicit reads
* of `c` at sinks and inputs to additional taint steps.
*/
bindingset[node]
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet c) {
exists(Type containerType |
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet cs) {
exists(Type containerType, DataFlow::Content c |
node instanceof DataFlow::ArgumentNode and
DataFlowPrivate::getElementType*(node.getType()) = containerType
getElementType*(node.getType()) = containerType and
cs.asOneContent() = c
|
c = DataFlowPrivate::getContentForType(containerType)
containerType instanceof ArrayType and
c instanceof DataFlow::ArrayContent
or
containerType instanceof SliceType and
c instanceof DataFlow::ArrayContent
or
containerType instanceof ChanType and
c instanceof DataFlow::CollectionContent
or
containerType instanceof MapType and
c instanceof DataFlow::MapValueContent
or
c.(DataFlow::PointerContent).getPointerType() = containerType
)
}

Expand Down Expand Up @@ -122,7 +143,7 @@ predicate elementWriteStep(DataFlow::Node pred, DataFlow::Node succ) {
any(DataFlow::Write w).writesElement(succ.(DataFlow::PostUpdateNode).getPreUpdateNode(), _, pred)
or
FlowSummaryImpl::Private::Steps::summaryStoreStep(pred.(DataFlowPrivate::FlowSummaryNode)
.getSummaryNode(), any(DataFlow::Content c | c instanceof DataFlow::ArrayContent),
.getSummaryNode(), any(DataFlow::ArrayContent ac).asContentSet(),
succ.(DataFlowPrivate::FlowSummaryNode).getSummaryNode())
}

Expand Down
2 changes: 1 addition & 1 deletion go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ module NetHttp {
|
lastParamIndex = call.getCall().getCalleeType().getNumParameter() - 1 and
varArgsSliceArgument = SummaryComponentStack::argument(lastParamIndex) and
arrayContentSC = SummaryComponent::content(arrayContent) and
arrayContentSC = SummaryComponent::content(arrayContent.asContentSet()) and
stack = SummaryComponentStack::push(arrayContentSC, varArgsSliceArgument)
)
else stack = SummaryComponentStack::argument(n)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@ func test() {
buff1.Read(bytes1)
sink(bytes1) // $ hasTaintFlow=1 hasTaintFlow=2 hasTaintFlow=3

tmpl.Execute(buff2, toSerialize)
// Read `buff2` via an `any`-typed variable, to ensure the static type of the argument to tmpl.Execute makes no difference to the result
var toSerializeAsAny any
toSerializeAsAny = toSerialize
tmpl.Execute(buff2, toSerializeAsAny)
buff2.Read(bytes2)
sink(bytes2) // $ hasTaintFlow=1 hasTaintFlow=2 hasTaintFlow=3

Expand Down

0 comments on commit fe808f7

Please sign in to comment.