Skip to content

Commit

Permalink
[dataflowengineoss] composable semantics (#4974)
Browse files Browse the repository at this point in the history
  • Loading branch information
xavierpinho authored Sep 29, 2024
1 parent f141437 commit f5dda21
Show file tree
Hide file tree
Showing 2 changed files with 216 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,88 @@ package io.joern.dataflowengineoss.semanticsloader

import io.shiftleft.codepropertygraph.generated.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.Method
import io.shiftleft.semanticcpg.language.*

trait Semantics {

/** Useful for `Semantics` that benefit from having some kind of internal state tailored to the current CPG.
*/
def initialize(cpg: Cpg): Unit
def initialize(cpg: Cpg): Unit = {}

def forMethod(method: Method): Option[FlowSemantic]

/** Builds a new `Semantics` whose `forMethod` behaviour first lookups in `other` and only if it fails (i.e. returns
* `None`) lookups in the current one.
*/
def after(other: Semantics): Semantics = Semantics.compose(this, other)
}

/** The empty Semantics */
object NoSemantics extends Semantics {
object Semantics {

override def initialize(cpg: Cpg): Unit = {}
private def compose(first: Semantics, second: Semantics): Semantics = new Semantics {

override def initialize(cpg: Cpg): Unit = {
second.initialize(cpg)
first.initialize(cpg)
}

override def forMethod(method: Method): Option[FlowSemantic] =
second.forMethod(method).orElse { first.forMethod(method) }
}
}

/** The empty Semantics, whose `forMethod` always fails, i.e. the identity under `Semantics.after`. */
object NoSemantics extends Semantics {

override def forMethod(method: Method): Option[FlowSemantic] = None
}

/** The nil Semantics, whose `forMethod` always succeeds but returns the empty (nil) mapping. */
object NilSemantics {

/** Builds a universal nil semantics. Beware this is right-absorbing under `Semantics.after`. */
def apply(): Semantics = new Semantics {
override def forMethod(method: Method): Option[FlowSemantic] = Some(FlowSemantic(method.fullName, List.empty))
}

/** Extensionally builds a nil semantics. */
def where(methodFullNames: List[String], regex: Boolean = false): Semantics =
FullNameSemantics.fromList(methodFullNames.map {
FlowSemantic(_, List.empty, regex)
})

/** Intensionally builds a nil semantics. */
def where(predicate: Method => Boolean): Semantics = new Semantics {
override def forMethod(method: Method): Option[FlowSemantic] = Option.when(predicate(method)) {
FlowSemantic(method.fullName, List.empty)
}
}
}

/** Semantics whose mappings are: 0->0, PassThroughMapping. */
object NoCrossTaintSemantics {

/** Builds a universal no-cross-taint semantics. Beware this is right-absorbing under `Semantics.after`. */
def apply(): Semantics = new Semantics {
override def forMethod(method: Method): Option[FlowSemantic] = Some(
FlowSemantic(method.fullName, List(FlowMapping(0, 0), PassThroughMapping))
)
}

/** Extensionally builds a no-cross-taint semantics. */
def where(methodFullNames: List[String], regex: Boolean = false): Semantics =
FullNameSemantics.fromList(methodFullNames.map {
FlowSemantic(_, List(FlowMapping(0, 0), PassThroughMapping), regex)
})

/** Intensionally builds a no-cross-taint semantics. */
def where(predicate: Method => Boolean): Semantics = new Semantics {
override def forMethod(method: Method): Option[FlowSemantic] = Option.when(predicate(method)) {
FlowSemantic(method.fullName, List(FlowMapping(0, 0), PassThroughMapping))
}
}
}

case class FlowSemantic(methodFullName: String, mappings: List[FlowPath] = List.empty, regex: Boolean = false)

object FlowSemantic {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,14 @@ package io.joern.pysrc2cpg.dataflow

import io.joern.dataflowengineoss.DefaultSemantics
import io.joern.dataflowengineoss.language.toExtendedCfgNode
import io.joern.dataflowengineoss.semanticsloader.{FlowMapping, FlowSemantic, PassThroughMapping}
import io.joern.dataflowengineoss.semanticsloader.{
FlowMapping,
FlowSemantic,
NilSemantics,
NoCrossTaintSemantics,
NoSemantics,
PassThroughMapping
}
import io.joern.pysrc2cpg.PySrc2CpgFixture
import io.shiftleft.codepropertygraph.generated.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.{Literal, Member, Method}
Expand Down Expand Up @@ -64,7 +71,7 @@ class DataFlowTests extends PySrc2CpgFixture(withOssDataflow = true) {
|a = 20
|print(foo(a))
|""".stripMargin)
.withSemantics(DefaultSemantics().plus(List(FlowSemantic("helpers.py:<module>.foo", List()))))
.withSemantics(DefaultSemantics().after(NilSemantics.where(List("helpers.py:<module>.foo"))))
val source = cpg.literal("20").l
val sink = cpg.call("print").argument(1).l
val flows = sink.reachableByFlows(source).l
Expand Down Expand Up @@ -102,7 +109,7 @@ class DataFlowTests extends PySrc2CpgFixture(withOssDataflow = true) {
|from helpers import foo
|print(foo(20))
|""".stripMargin)
.withSemantics(DefaultSemantics().plus(List(FlowSemantic("helpers.py:<module>.foo", List()))))
.withSemantics(DefaultSemantics().after(NilSemantics.where(List("helpers.py:<module>.foo"))))
val source = cpg.literal("20").l
val sink = cpg.call("print").argument(1).l
val flows = sink.reachableByFlows(source).l
Expand Down Expand Up @@ -141,7 +148,7 @@ class DataFlowTests extends PySrc2CpgFixture(withOssDataflow = true) {
|a = 20
|print(foo(a))
|""".stripMargin)
.withSemantics(DefaultSemantics().plus(List(FlowSemantic("Test0.py:<module>.foo", List()))))
.withSemantics(DefaultSemantics().after(NilSemantics.where(List("Test0.py:<module>.foo"))))
val source = cpg.literal("20").l
val sink = cpg.call("print").argument(1).l
val flows = sink.reachableByFlows(source).l
Expand Down Expand Up @@ -865,6 +872,143 @@ class DataFlowTests extends PySrc2CpgFixture(withOssDataflow = true) {

}

class DefaultSemanticsDataFlowTest1 extends PySrc2CpgFixture(withOssDataflow = true, semantics = DefaultSemantics()) {

"DefaultSemantics cross-taints arguments to external method calls" in {
val cpg = code("""
|import bar
|a = 1
|bar.foo(b, Z=a)
|bar.baz(b)
|""".stripMargin)
val source = cpg.literal("1")
val sink = cpg.call("baz")
sink.reachableByFlows(source).map(flowToResultPairs).l shouldBe List(
List(("a = 1", 3), ("bar.foo(b, Z = a)", 4), ("bar.baz(b)", 5))
)
}

"DefaultSemantics taints external method call return values" in {
val cpg = code("""
|import bar
|y = 1
|x = bar.foo(y)
|bar.baz(x)
|""".stripMargin)
val source = cpg.literal("1")
val sink = cpg.call("baz")
sink.reachableByFlows(source).map(flowToResultPairs).l shouldBe List(
List(("y = 1", 3), ("bar.foo(y)", 4), ("x = bar.foo(y)", 4), ("bar.baz(x)", 5))
)
}

}

class NoSemanticsDataFlowTest1 extends PySrc2CpgFixture(withOssDataflow = true, semantics = NoSemantics) {

"NoSemantics cross-taints arguments to external method calls" in {
val cpg = code("""
|import bar
|a = 1
|bar.foo(b, Z=a)
|bar.baz(b)
|""".stripMargin)
val source = cpg.literal("1")
val sink = cpg.call("baz")
sink.reachableByFlows(source).map(flowToResultPairs).l shouldBe List(
List(("a = 1", 3), ("bar.foo(b, Z = a)", 4), ("bar.baz(b)", 5))
)
}

"NoSemantics taints external method call return values" in {
val cpg = code("""
|import bar
|y = 1
|x = bar.foo(y)
|bar.baz(x)
|""".stripMargin)
val source = cpg.literal("1")
val sink = cpg.call("baz")
sink.reachableByFlows(source).map(flowToResultPairs).l shouldBe List(
List(("y = 1", 3), ("bar.foo(y)", 4), ("x = bar.foo(y)", 4), ("bar.baz(x)", 5))
)
}
}

class NilSemanticsDataFlowTest1
extends PySrc2CpgFixture(withOssDataflow = true, semantics = NilSemantics().after(DefaultSemantics())) {

"NilSemantics does not cross-taint arguments to external method calls" in {
val cpg = code("""
|import bar
|a = 1
|bar.foo(b, Z=a)
|bar.baz(b)
|""".stripMargin)
val source = cpg.literal("1")
val sink = cpg.call("baz")
sink.reachableByFlows(source).map(flowToResultPairs) shouldBe empty
}

"NilSemantics does not taint external method call return values" in {
val cpg = code("""
|import bar
|y = 1
|x = bar.foo(y)
|bar.baz(x)
|""".stripMargin)
val source = cpg.literal("1")
val sink = cpg.call("baz")
sink.reachableByFlows(source).map(flowToResultPairs).l shouldBe List()
}
}

class NoCrossTaintDataFlowTest1
extends PySrc2CpgFixture(
withOssDataflow = true,
semantics = NoCrossTaintSemantics.where(_.fullName.contains("bar.py")).after(DefaultSemantics())
) {

"NoCrossTaintSemantics prevents cross-tainting arguments to external method calls" in {
val cpg = code("""
|import bar
|a = 1
|bar.foo(b, Z=a)
|bar.baz(b)
|""".stripMargin)
val source = cpg.literal("1")
val sink = cpg.call("baz").argument.argumentIndex(1)
sink.reachableByFlows(source).map(flowToResultPairs) shouldBe empty
}
}

class NoCrossTaintDataFlowTest2
extends PySrc2CpgFixture(
withOssDataflow = true,
semantics = NoCrossTaintSemantics.where(_.fullName.contains("foo")).after(DefaultSemantics())
) {

"NoCrossTaintSemantics works for specific external method call" in {
val cpg = code("""
|import bar
|a = 1
|bar.foo(a,b) # foo has no-cross-taint semantics, so b is not tainted by a
|bar.baz(a,c) # however, baz has default semantics, so c is tainted by a
|print(b)
|print(c)
|""".stripMargin)
val source = cpg.literal("1")
val sink = cpg.call.name("print").argument.argumentIndex(1)
// Note: it's unfortunate that `(bar.foo(a, b), 4)` still shows up in this flow.
// However, we can check that NoCrossTaintSemantics is doing its job, as otherwise
// we'd also have a `print(b)` sink.
sink.reachableByFlows(source).map(flowToResultPairs).l shouldBe List(
List(("a = 1", 3), ("bar.foo(a, b)", 4), ("bar.baz(a, c)", 5), ("print(c)", 7))
)
}

}

class RegexDefinedFlowsDataFlowTests
extends PySrc2CpgFixture(
withOssDataflow = true,
Expand Down

0 comments on commit f5dda21

Please sign in to comment.