Skip to content

Commit

Permalink
support for file context
Browse files Browse the repository at this point in the history
  • Loading branch information
juli1 committed Mar 5, 2024
1 parent 97d161b commit fe5b098
Show file tree
Hide file tree
Showing 5 changed files with 256 additions and 4 deletions.
1 change: 1 addition & 0 deletions crates/static-analysis-kernel/src/analysis.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub mod analyze;
pub mod file_context;
pub mod javascript;
pub mod tree_sitter;
62 changes: 62 additions & 0 deletions crates/static-analysis-kernel/src/analysis/analyze.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::analysis::file_context::get_file_context;
use crate::analysis::javascript::execute_rule;
use crate::analysis::tree_sitter::{get_query_nodes, get_tree};
use crate::model::analysis::{AnalysisOptions, LinesToIgnore};
Expand Down Expand Up @@ -103,6 +104,7 @@ where
vec![]
},
|tree| {
let file_context = get_file_context(&tree, &code.to_string());
rules
.into_iter()
.map(|rule| {
Expand Down Expand Up @@ -135,6 +137,7 @@ where
nodes,
filename.to_string(),
analysis_option.clone(),
&file_context,
);

// filter violations that have been ignored
Expand Down Expand Up @@ -537,6 +540,65 @@ def foo(arg1):
);
}

#[test]
fn test_go_file_context() {
let code = r#"
import (
"math/rand"
crand1 "crypto/rand"
crand2 "crypto/rand"
)
func main () {
}
"#;

let query = r#"(function_declaration) @func"#;

let rule_code = r#"
function visit(node, filename, code) {
const n = node.captures["func"];
console.log(node.context.packages);
if(node.context.packages.includes("math/rand")) {
const error = buildError(n.start.line, n.start.col, n.end.line, n.end.col, "invalid name", "CRITICAL", "security");
addError(error);
}
}
"#;

let rule = RuleInternal {
name: "myrule".to_string(),
short_description: Some("short desc".to_string()),
description: Some("description".to_string()),
category: RuleCategory::CodeStyle,
severity: RuleSeverity::Notice,
language: Language::Go,
code: rule_code.to_string(),
tree_sitter_query: get_query(query, &Language::Go).unwrap(),
variables: HashMap::new(),
};

let analysis_options = AnalysisOptions {
log_output: true,
use_debug: false,
};
let results = analyze(
&Language::Go,
&vec![rule],
"myfile.go",
code,
&analysis_options,
);

assert_eq!(1, results.len());
let result = results.get(0).unwrap();
let output = result.output.clone().unwrap();
assert_eq!(result.violations.len(), 1);
assert!(output.contains("\"math/rand\""));
assert!(output.contains("\"crypto/rand\""));
}

#[test]
fn test_get_lines_to_ignore_javascript() {
// no-dd-sa ruleset1/rule1 on line 3 so we ignore line 4 for ruleset1/rule1
Expand Down
167 changes: 167 additions & 0 deletions crates/static-analysis-kernel/src/analysis/file_context.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
use crate::analysis::tree_sitter::get_tree_sitter_language;
use crate::model::common::Language;
use derive_builder::Builder;
use serde::Serialize;
use std::collections::{HashMap, HashSet};
use tree_sitter::{Query, QueryCursor, Tree};

/// Structure for the file context that is specific to Go.
#[derive(Default, Serialize, Builder, Debug, Clone)]
pub struct FileContextGo {
pub packages: Vec<String>,
pub packages_aliased: HashMap<String, String>,
}

/// Contains all the context for all languages. When we need to serialize this as a string to execute
/// we use the language-specific structure to do so.
#[derive(Builder, Debug)]
pub struct FileContext {
pub file_context_go: Option<FileContextGo>,
pub language: Option<Language>,
}

impl FileContext {
/// Returns the struct injected in the JavaScript of the rule being executed. This just serializes something
/// for all rules to be executed.
/// If we cannot generate a valid file context, we return an empty object {}
pub fn to_json_string(&self) -> String {
match self.language {
Some(Language::Go) => {
if let Some(file_context_go) = &self.file_context_go {
serde_json::to_string(file_context_go).unwrap()
} else {
"{}".to_string()
}
}
_ => "{}".to_string(),
}
}
}

/// Get the context for Go. It takes the tree sitter tree, and get all the necessary artifacts to build the context.
pub fn get_file_context_go(tree: &Tree, code: &String) -> FileContextGo {
let mut packages_list: HashSet<String> = HashSet::new();
let mut packages_aliased: HashMap<String, String> = HashMap::new();

// Query to get all the packages and their potential aliases. The first capture is the potential alias,
// the second capture is the name of the package.
let query_string = r#"
(import_spec
name: (_)? @name
path: (_) @package
)
"#;
let query = Query::new(&tree.language(), query_string).unwrap();

let mut query_cursor = QueryCursor::new();
let query_result = query_cursor.matches(&query, tree.root_node(), code.as_bytes());
for query_match in query_result {
let mut package_name: Option<String> = None;
let mut package_alias: Option<String> = None;

for capture in query_match.captures {
let start = capture.node.byte_range().start;
let end = capture.node.byte_range().end;

if capture.index == 0 {
let str = code
.chars()
.skip(start)
.take(end - start)
.collect::<String>();
package_alias = Some(str);
}

// The package name includes the quotes. We do not want to capture the quotes, we only want
// to capture the package name. For this reason, we need to play with -1/+1 with the index.
if capture.index == 1 {
let str = code
.chars()
.skip(start + 1)
.take(end - start - 2)
.collect::<String>();
package_name = Some(str);
}
}

// always add the package to the list
if let Some(pkg) = &package_name {
packages_list.insert(pkg.clone());
}

// if we have the alias, add it. If we have only the package name, add the package name as an alias
// so that we have a simple mapping between package and full qualified name
match (&package_alias, &package_name) {
(Some(alias), Some(pkg)) => {
packages_aliased.insert(alias.clone(), pkg.clone());
}

(None, Some(pkg)) => {
let alias_from_pkg = pkg.split('/').last();
if let Some(alias) = alias_from_pkg {
packages_aliased.insert(alias.to_string(), pkg.clone());
}
}

_ => {}
}
}

FileContextGo {
packages: packages_list.into_iter().collect::<Vec<String>>(),
packages_aliased,
}
}

pub fn get_empty_file_context() -> FileContext {
FileContext {
file_context_go: None,
language: None,
}
}

pub fn get_file_context(tree: &Tree, code: &String) -> FileContext {
if tree.language().to_owned() == get_tree_sitter_language(&Language::Go) {
return FileContext {
file_context_go: Some(get_file_context_go(tree, code)),
language: Some(Language::Go),
};
}

get_empty_file_context()
}

#[cfg(test)]
mod tests {
use super::*;
use crate::analysis::tree_sitter::get_tree;
use crate::model::common::Language;

#[test]
fn test_get_file_context_go() {
let code = r#"
import (
"math/rand"
crand1 "crypto/rand"
crand2 "crypto/rand"
)
"#;

let tree = get_tree(code, &Language::Go).unwrap();

let file_context = get_file_context(&tree, &code.to_string());
assert!(file_context.file_context_go.is_some());
let file_context_go = file_context.file_context_go.unwrap();

assert_eq!((&(file_context_go).packages).len(), 2);
assert_eq!((&(file_context_go).packages_aliased).len(), 3);
assert_eq!(
(file_context_go).packages_aliased.get("crand1").unwrap(),
"crypto/rand"
);
assert_eq!(
(file_context_go).packages_aliased.get("rand").unwrap(),
"math/rand"
);
}
}
Loading

0 comments on commit fe5b098

Please sign in to comment.