Skip to content

Commit

Permalink
Merge pull request #535 from DataDog/jf/STAL-2792
Browse files Browse the repository at this point in the history
[STAL-2792] Add CLI option to export data flow graphs
  • Loading branch information
jasonforal authored Oct 25, 2024
2 parents 06d4d39 + 0da499e commit 3228c65
Show file tree
Hide file tree
Showing 11 changed files with 967 additions and 356 deletions.
21 changes: 19 additions & 2 deletions crates/bins/src/bin/datadog-static-analyzer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ use cli::utils::{choose_cpu_count, get_num_threads_to_use, print_configuration};
use cli::violations_table;
use common::analysis_options::AnalysisOptions;
use common::model::diff_aware::DiffAware;
use kernel::analysis::analyze::analyze;
use kernel::analysis::analyze::{analyze, generate_flow_graph_dot};
use kernel::analysis::generated_content::DEFAULT_IGNORED_GLOBS;
use kernel::constants::{CARGO_VERSION, VERSION};
use kernel::model::analysis::ERROR_RULE_TIMEOUT;
use kernel::model::common::OutputFormat;
use kernel::model::common::{Language, OutputFormat};
use kernel::model::config_file::{ConfigFile, ConfigMethod, PathConfig};
use kernel::model::rule::{Rule, RuleInternal, RuleResult, RuleSeverity};
use kernel::rule_config::RuleConfigProvider;
Expand Down Expand Up @@ -78,6 +78,7 @@ fn main() -> Result<()> {
"/path/to/rules.json",
);
opts.optopt("d", "debug", "use debug mode", "yes/no");
opts.optflag("", "debug-export-java-dfa", "export Java flow graphs by writing a `{filename}.dot` file next to each Java file scanned; this dirties the working directory");
opts.optopt("f", "format", "format of the output file", "json/sarif/csv");
opts.optopt("o", "output", "output file name", "output.json");
opts.optflag(
Expand Down Expand Up @@ -186,6 +187,8 @@ fn main() -> Result<()> {
.opt_str("d")
.map(|value| value == "yes" || value == "true")
.get_or_insert(env::var_os("DD_SA_DEBUG").is_some());
let debug_java_dfa = matches.opt_present("debug-export-java-dfa");

let output_file = matches
.opt_str("o")
.context("output file must be specified")?;
Expand Down Expand Up @@ -560,6 +563,20 @@ fn main() -> Result<()> {

should_retain
});

if debug_java_dfa && *language == Language::Java {
if let Some(graph) = generate_flow_graph_dot(
*language,
&relative_path,
&file_content,
&rule_config,
&analysis_options,
) {
let dot_path = path.with_extension("dot");
let _ = fs::write(dot_path, graph);
}
}

results
} else {
eprintln!("error when getting content of path {}", &path.display());
Expand Down
169 changes: 167 additions & 2 deletions crates/static-analysis-kernel/src/analysis/analyze.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
use crate::analysis::ddsa_lib::common::DDSAJsRuntimeError;
use crate::analysis::ddsa_lib::js::flow::java::{ClassGraph, FileGraph};
use crate::analysis::ddsa_lib::runtime::ExecutionResult;
use crate::analysis::ddsa_lib::JsRuntime;
use crate::analysis::generated_content::{is_generated_file, is_minified_file};
use crate::analysis::tree_sitter::get_tree;
use crate::analysis::tree_sitter::{get_tree, get_tree_sitter_language, TSQuery};
use crate::model::analysis::{
FileIgnoreBehavior, LinesToIgnore, ERROR_RULE_EXECUTION, ERROR_RULE_TIMEOUT,
};
use crate::model::common::Language;
use crate::model::rule::{RuleInternal, RuleResult};
use crate::model::rule::{RuleCategory, RuleInternal, RuleResult, RuleSeverity};
use crate::rule_config::RuleConfig;
use common::analysis_options::AnalysisOptions;
use std::borrow::Borrow;
Expand Down Expand Up @@ -294,6 +295,109 @@ where
.collect()
}

/// Returns a [DOT Language] graph that models taint flow within the file.
/// If the file contains an unsupported language, `None` is returned.
///
/// This is an expensive, unoptimized function.
///
/// [DOT Language]: https://graphviz.org/doc/info/lang.html
pub fn generate_flow_graph_dot(
language: Language,
file_name: &Arc<str>,
file_contents: &Arc<str>,
rule_config: &RuleConfig,
analysis_option: &AnalysisOptions,
) -> Option<String> {
// language=javascript
let rule_code = r#"
function visit(captures) {
const classNode = captures.get("class");
if (classNode?.cstType !== "class_declaration") {
return;
}
const classChildren = ddsa.getChildren(classNode);
const className = classChildren.find((n) => n.fieldName === "name");
const classBody = classChildren.find((n) => n.fieldName === "body");
const bodyChildren = ddsa.getChildren(classBody);
const graphs = [];
for (const bodyChild of bodyChildren) {
if (bodyChild.cstType === "method_declaration") {
const graph = __ddsaPrivate__.generateJavaFlowGraph(bodyChild);
// Create a method signature:
const methodChildren = ddsa.getChildren(bodyChild);
const type = (methodChildren.find((n) => n.fieldName === "type"))?.text ?? "";
const name = (methodChildren.find((n) => n.fieldName === "name"))?.text ?? "";
const params = (methodChildren.find((n) => n.fieldName === "parameters"))?.text ?? "";
const methodSig = `${type} ${name}${params}`
graphs.push(__ddsaPrivate__.graphToDOT(graph, methodSig));
}
}
if (graphs.length === 0) {
return;
}
// HACK: Pass structured string data back by repurposing fields of a "Violation":
// Violation.description -> class name
// Violation.fixes[i].description -> Serialized DOT graph for individual method
const violation = Violation.new(className.text, classNode);
for (const dotGraph of graphs) {
violation.addFix(Fix.new(dotGraph, []));
}
addError(violation);
}
"#;
let class_tsq = "\
(program (class_declaration) @class)
";
match language {
Language::Java => {
let tree_sitter_query =
TSQuery::try_new(&get_tree_sitter_language(&language), class_tsq).ok()?;
let rule = RuleInternal {
name: "<java-debug>/dataflow-dot".to_string(),
short_description: None,
description: None,
category: RuleCategory::Unknown,
severity: RuleSeverity::None,
language,
code: rule_code.to_string(),
tree_sitter_query,
};

let results = analyze(
&language,
[rule],
file_name,
file_contents,
rule_config,
analysis_option,
);
let result = results.first().expect("there should be exactly one result");

if result.violations.is_empty() {
return None;
}
let mut file_graph = FileGraph::new(file_name.as_ref());
for v in &result.violations {
let class_name = &v.message;
let mut class_graph = ClassGraph::new(class_name);
for fix in &v.fixes {
// We pass already-serialized graphs for each method as a "fix description".
// Thus, we need to reparse this into a `dot_structures::Graph`.
if let Ok(graph) = graphviz_rust::parse(&fix.description) {
// (The JavaScript implementation already provides the method signature)
class_graph.add_method(graph, None);
};
}
file_graph.add_class(class_graph);
}
Some(file_graph.to_dot())
}
_ => None,
}
}

#[cfg(test)]
mod tests {

Expand Down Expand Up @@ -1200,4 +1304,65 @@ rulesets:
RuleSeverity::Warning
);
}

#[test]
fn java_taint_flow_dot_graph() {
// language=java
let file_contents = "\
public class ClassA {
void echo(String a) {
someMethod(a);
}
}
public class ClassB {
void echo(String a) {
someMethod(a);
}
}
";
let parsed_dot = generate_flow_graph_dot(
Language::Java,
&Arc::from("path/to/file.java"),
&Arc::from(file_contents),
&RuleConfig::default(),
&AnalysisOptions::default(),
);
// language=dot
let expected = r#"
strict digraph "path/to/file.java" {
label="path/to/file.java"
subgraph "cluster: ClassA" {
label=ClassA
subgraph "cluster: void echo(String a)" {
label="void echo(String a)"
"a:3:14"[text=a,line=3,col=14,cstkind=identifier,vkind=cst]
"(a):3:13"[text="(a)",line=3,col=13,cstkind=argument_list,vkind=cst]
"someMethod(a):3:3"[text="someMethod(a)",line=3,col=3,cstkind=method_invocation,vkind=cst]
"a:2:22"[text=a,line=2,col=22,cstkind=identifier,vkind=cst]
"a:3:14" -> "a:2:22" [kind=dependence]
"(a):3:13" -> "a:3:14" [kind=dependence]
"someMethod(a):3:3" -> "(a):3:13" [kind=dependence]
}
}
subgraph "cluster: ClassB" {
label=ClassB
subgraph "cluster: void echo(String a)" {
label="void echo(String a)"
"a:9:14"[text=a,line=9,col=14,cstkind=identifier,vkind=cst]
"(a):9:13"[text="(a)",line=9,col=13,cstkind=argument_list,vkind=cst]
"someMethod(a):9:3"[text="someMethod(a)",line=9,col=3,cstkind=method_invocation,vkind=cst]
"a:8:22"[text=a,line=8,col=22,cstkind=identifier,vkind=cst]
"a:9:14" -> "a:8:22" [kind=dependence]
"(a):9:13" -> "a:9:14" [kind=dependence]
"someMethod(a):9:3" -> "(a):9:13" [kind=dependence]
}
}
}
"#;
// Reparse and compare structs
let parsed_dot = graphviz_rust::parse(&parsed_dot.unwrap()).unwrap();
let expected_dot = graphviz_rust::parse(expected).unwrap();
assert_eq!(parsed_dot, expected_dot);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ deno_core::extension!(
ops::op_ts_node_text,
// Language-specific
ops::op_java_get_bin_expr_operator,
ops::op_digraph_adjacency_list_to_dot,
],
esm_entry_point = "ext:ddsa_lib/__bootstrap.js",
esm = [ dir "src/analysis/ddsa_lib/js", "__bootstrap.js" ],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@
"use strict";

import {CodeRegion} from "ext:ddsa_lib/region";
import {DDSA} from "ext:ddsa_lib/ddsa";
import {DDSA, DDSAPrivate} from "ext:ddsa_lib/ddsa";
import {DDSA_Console} from "ext:ddsa_lib/utility";
import {Digraph} from "ext:ddsa_lib/flow/graph";
import {FileContext} from "ext:ddsa_lib/context_file";
import {FileContextGo} from "ext:ddsa_lib/context_file_go";
import {FileContextTerraform, TerraformResource} from "ext:ddsa_lib/context_file_tf";
import {FileContextJavaScript, PackageImport} from "ext:ddsa_lib/context_file_js";
import {Fix} from "ext:ddsa_lib/fix";
import {QueryMatch} from "ext:ddsa_lib/query_match";
import {QueryMatchCompat} from "ext:ddsa_lib/query_match_compat";
import {RootContext} from "ext:ddsa_lib/context_root";
Expand All @@ -30,6 +31,7 @@ globalThis.FileContextGo = FileContextGo;
globalThis.FileContextJavaScript = FileContextJavaScript;
globalThis.PackageImport = PackageImport;
globalThis.FileContextTerraform = FileContextTerraform;
globalThis.Fix = Fix;
globalThis.TerraformResource = TerraformResource;
globalThis.QueryMatch = QueryMatch;
globalThis.QueryMatchCompat = QueryMatchCompat;
Expand All @@ -51,3 +53,5 @@ for (const [name, obj] of Object.entries(stellaCompat)) {

globalThis.console = new DDSA_Console();
globalThis.ddsa = new DDSA();
// Note: The name "private" is just used to communicate intent -- there is no enforcement preventing rules from using this.
globalThis.__ddsaPrivate__ = new DDSAPrivate();
36 changes: 35 additions & 1 deletion crates/static-analysis-kernel/src/analysis/ddsa_lib/js/ddsa.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import { MethodFlow } from "ext:ddsa_lib/flow/java";
import { SEALED_EMPTY_ARRAY } from "ext:ddsa_lib/utility";
import { TreeSitterFieldChildNode } from "ext:ddsa_lib/ts_node";

const { op_ts_node_named_children, op_ts_node_parent } = Deno.core.ops;
const { op_digraph_adjacency_list_to_dot, op_ts_node_named_children, op_ts_node_parent } = Deno.core.ops;

/**
* The main entrypoint to the ddsa JavaScript runtime's API.
Expand Down Expand Up @@ -106,3 +106,37 @@ export class DDSA {
return _findTaintFlows(transposed, vertexId(sourceNode), true);
}
}

/**
* The entrypoint to the private (unpublished) API of the ddsa JavaScript runtime.
* This API has no guarantee of stability.
*/
export class DDSAPrivate {
/**
* Converts a {@link Digraph} to its canonical DOT form with the provided graph name.
* @param {Digraph} graph
* @param {string} name
*
* @returns {string}
*/
graphToDOT(graph, name) {
return op_digraph_adjacency_list_to_dot(graph.adjacencyList, name) ?? "";
}

/**
* Generates a {@link Digraph} from CST node, returning `undefined` if the node is not
* a "method_declaration" node.
*
* NOTE: This method assumes it is running in a Java context.
*
* @param {TreeSitterNode}
* @returns {Digraph | undefined}
*/
generateJavaFlowGraph(node) {
if (node?.cstType !== "method_declaration") {
return undefined;
}
const methodFlow = new MethodFlow(node);
return methodFlow.graph;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
// Copyright 2024 Datadog, Inc.

pub(crate) mod graph;
pub(crate) mod graph_test_utils;
pub(crate) mod java;
Loading

0 comments on commit 3228c65

Please sign in to comment.