From f679384a10b10f7bf430b35ceebbef270078b405 Mon Sep 17 00:00:00 2001 From: Julien Delange Date: Mon, 24 Jul 2023 14:04:45 -0400 Subject: [PATCH 1/2] support unnamed nodes --- kernel/src/analysis/analyze.rs | 70 ++++++++++++++++++++++++++++++ kernel/src/analysis/tree_sitter.rs | 15 +++++-- 2 files changed, 81 insertions(+), 4 deletions(-) diff --git a/kernel/src/analysis/analyze.rs b/kernel/src/analysis/analyze.rs index 2b25287e..cacc4583 100644 --- a/kernel/src/analysis/analyze.rs +++ b/kernel/src/analysis/analyze.rs @@ -296,6 +296,76 @@ function visit(node, filename, code) { ); } + // execute two rules and check that both rules are executed and their respective + // results reported. + #[test] + fn test_capture_unnamed_nodes() { + let rule_code1 = r#" +function visit(node, filename, code) { + + const el = node.captures["less_than"]; + if(el) { + const error = buildError(el.start.line, el.start.col, el.end.line, el.end.col, + "do not use less than", "CRITICAL", "security"); + addError(error); + } +} + "#; + + let tree_sitter_query = r#" +( + (for_statement + condition: (_ + (binary_expression + left: (identifier) + operator: [ + "<" @less_than + "<=" @less_than + ">" @more_than + ">=" @more_than + ] + ) + ) + ) +) + "#; + + let js_code = r#" +for(var i = 0; i <= 10; i--){} + "#; + + let rule1 = RuleInternal { + name: "myrule".to_string(), + short_description: Some("short desc".to_string()), + description: Some("description".to_string()), + category: RuleCategory::CodeStyle, + severity: RuleSeverity::Notice, + language: Language::JavaScript, + code: rule_code1.to_string(), + tree_sitter_query: Some(tree_sitter_query.to_string()), + variables: HashMap::new(), + }; + + let analysis_options = AnalysisOptions { + log_output: true, + use_debug: false, + }; + let results = analyze( + &Language::JavaScript, + vec![rule1], + "myfile.js", + js_code, + &analysis_options, + ); + assert_eq!(1, results.len()); + let result1 = results.get(0).unwrap(); + assert_eq!(result1.violations.len(), 1); + assert_eq!( + result1.violations.get(0).unwrap().message, + "do not use less than".to_string() + ); + } + // test showing violation ignore #[test] fn test_violation_ignore() { diff --git a/kernel/src/analysis/tree_sitter.rs b/kernel/src/analysis/tree_sitter.rs index 6ebde0e8..030d82ea 100644 --- a/kernel/src/analysis/tree_sitter.rs +++ b/kernel/src/analysis/tree_sitter.rs @@ -105,9 +105,12 @@ pub fn get_query_nodes( // // If this is NOT a named node, we do not return anything. pub fn map_node(node: tree_sitter::Node) -> Option { - fn map_node_internal(cursor: &mut tree_sitter::TreeCursor) -> Option { + fn map_node_internal( + cursor: &mut tree_sitter::TreeCursor, + only_named_node: bool, + ) -> Option { // we do not map space, parenthesis and other non-named nodes. - if !cursor.node().is_named() { + if only_named_node && !cursor.node().is_named() { return None; } @@ -115,7 +118,8 @@ pub fn map_node(node: tree_sitter::Node) -> Option { let mut children: Vec = vec![]; if cursor.goto_first_child() { loop { - let maybe_child = map_node_internal(cursor); + // For the child, we only want to capture named nodes to avoid polluting the AST. + let maybe_child = map_node_internal(cursor, true); if let Some(child) = maybe_child { children.push(child); } @@ -145,7 +149,10 @@ pub fn map_node(node: tree_sitter::Node) -> Option { } let mut ts_cursor = node.walk(); - map_node_internal(&mut ts_cursor) + + // Initially, we do not capture only named node to allow capturing unnamed node from + // the tree-sitter query. + map_node_internal(&mut ts_cursor, false) } #[cfg(test)] From 927a26fec63fc8e82b8cb7196855280afaf61174 Mon Sep 17 00:00:00 2001 From: Julien Delange Date: Mon, 24 Jul 2023 14:59:34 -0400 Subject: [PATCH 2/2] updated comnents --- kernel/src/analysis/tree_sitter.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/kernel/src/analysis/tree_sitter.rs b/kernel/src/analysis/tree_sitter.rs index 030d82ea..9ec0b6cd 100644 --- a/kernel/src/analysis/tree_sitter.rs +++ b/kernel/src/analysis/tree_sitter.rs @@ -102,14 +102,13 @@ pub fn get_query_nodes( // map a node from the tree-sitter representation into our own internal representation // this is the representation that is passed to the JavaScript layer and how we represent // or expose the node to the end-user. -// -// If this is NOT a named node, we do not return anything. pub fn map_node(node: tree_sitter::Node) -> Option { fn map_node_internal( cursor: &mut tree_sitter::TreeCursor, only_named_node: bool, ) -> Option { - // we do not map space, parenthesis and other non-named nodes. + // we do not map space, parenthesis and other non-named nodes if there + // when `only_named_node` is true (which is `true` for children only). if only_named_node && !cursor.node().is_named() { return None; } @@ -150,7 +149,7 @@ pub fn map_node(node: tree_sitter::Node) -> Option { let mut ts_cursor = node.walk(); - // Initially, we do not capture only named node to allow capturing unnamed node from + // Initially, we capture both un/named nodes to allow capturing unnamed node from // the tree-sitter query. map_node_internal(&mut ts_cursor, false) }