Skip to content

Commit 2fbe020

Browse files
committed
Add config support and parse imports
1 parent b198137 commit 2fbe020

File tree

10 files changed

+220
-33
lines changed

10 files changed

+220
-33
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ phf = { version = "0.11.1", features = ["macros"] }
99
serde = { version = "1.0.152", features = ["derive"] }
1010
serde_json = "1.0.114"
1111
tree-sitter = "0.20.9"
12+
toml = "0.8.11"
1213

1314
[build-dependencies]
1415
cc="*"

asterisk.toml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
[languages]
2+
[languages.python]
3+
[languages.python.matchers]
4+
import_statement = "import_from_statement"
5+
[languages.python.matchers.module_name]
6+
field_name = "module_name"
7+
kind = "dotted_name"
8+
9+
[languages.python.matchers.object_name]
10+
field_name = "name"
11+
kind = "dotted_name"
12+
13+
[languages.python.matchers.alias]
14+
field_name = "alias"
15+
kind = "identifier"
16+
17+
[languages.rust]
18+
[languages.rust.matchers]
19+
import_statement = "use_declaration"
20+
[languages.rust.matchers.module_name]
21+
field_name = "path"
22+
kind = "identifier"
23+
24+
[languages.rust.matchers.object_name]
25+
field_name = "name"
26+
kind = "identifier"
27+
28+
[languages.rust.matchers.alias]
29+
field_name = "alias"
30+
kind = "identifier"

examples/cli.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
1+
12
use asterisk::indexer::index_directory;
23
use serde::Serialize;
34
use serde_json::json;
45
use std::env;
56
use std::fs::File;
67
use std::io::Write;
8+
use std::fs;
9+
use asterisk::config::Config;
710

811
#[derive(Serialize)]
912
struct Output {
@@ -12,9 +15,12 @@ struct Output {
1215
}
1316

1417
fn main() {
18+
let toml_str = fs::read_to_string("asterisk.toml").expect("Unable to read file");
19+
let config = Config::from_toml(&toml_str).unwrap();
20+
1521
let args: Vec<String> = env::args().collect();
1622
let dir_path = &args[1];
17-
let (blocks, call_stack, call_graph) = index_directory(dir_path);
23+
let (blocks, call_stack, call_graph) = index_directory(&config, dir_path);
1824

1925
let output = Output { blocks, call_stack };
2026

src/config.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
use serde::Deserialize;
2+
use std::collections::HashMap;
3+
4+
#[derive(Deserialize, Debug)]
5+
pub struct Config {
6+
pub languages: HashMap<String, Language>,
7+
}
8+
9+
#[derive(Deserialize, Debug)]
10+
pub struct Language {
11+
pub matchers: Matchers,
12+
}
13+
14+
#[derive(Deserialize, Debug)]
15+
pub struct Matchers {
16+
pub import_statement: String,
17+
pub module_name: Matcher,
18+
pub object_name: Matcher,
19+
pub alias: Matcher,
20+
}
21+
22+
#[derive(Deserialize, Debug)]
23+
pub struct Matcher {
24+
pub field_name: String,
25+
pub kind: String,
26+
}
27+
28+
29+
impl Config {
30+
pub fn from_toml(toml_str: &str) -> Result<Self, toml::de::Error> {
31+
let configs: Config = toml::from_str(toml_str).expect("Failed to parse TOML");
32+
33+
Ok(configs)
34+
}
35+
}

src/indexer.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use jwalk::WalkDir;
22
use std::path::Path;
3+
use crate::config::Config;
34

45
use crate::block::{Block, BlockType};
56
use crate::call_graph::CallGraph;
@@ -30,7 +31,7 @@ pub fn generate_node_key(
3031
key
3132
}
3233

33-
pub fn index_directory(dir_path: &str) -> (Vec<Block>, CallStack, CallGraph) {
34+
pub fn index_directory(config: &Config, dir_path: &str) -> (Vec<Block>, CallStack, CallGraph) {
3435
let mut blocks = Vec::new();
3536
let mut call_stack = CallStack::new();
3637

@@ -40,7 +41,7 @@ pub fn index_directory(dir_path: &str) -> (Vec<Block>, CallStack, CallGraph) {
4041

4142
if path.is_file() && is_supported_file(&path) {
4243
let module_name = path.to_str().unwrap();
43-
let file_blocks = parse_file(&path, module_name);
44+
let file_blocks = parse_file(&path, module_name, &config);
4445
blocks.extend(file_blocks.clone());
4546

4647
for block in &file_blocks {

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ pub mod call_stack;
44
pub mod indexer;
55
pub mod parser;
66
pub mod utils;
7+
pub mod config;

src/parser.rs

Lines changed: 139 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
use crate::block::{Block, BlockType};
2+
use crate::config::{Config, Matchers};
3+
use jwalk::rayon::str::ParallelString;
24
use std::collections::{HashMap, HashSet};
35
use std::fs;
46
use std::path::Path;
@@ -12,7 +14,7 @@ extern "C" {
1214
// Add more language bindings here
1315
}
1416

15-
pub fn parse_file(file_path: &Path, module_name: &str) -> Vec<Block> {
17+
pub fn parse_file(file_path: &Path, module_name: &str, config: &Config) -> Vec<Block> {
1618
let code = fs::read_to_string(file_path).unwrap();
1719
let language = tree_sitter_language(file_path);
1820
let mut parser = Parser::new();
@@ -33,6 +35,7 @@ pub fn parse_file(file_path: &Path, module_name: &str) -> Vec<Block> {
3335
None,
3436
module_name,
3537
&mut imports,
38+
&config,
3639
);
3740

3841
if !non_function_blocks.is_empty() {
@@ -71,12 +74,14 @@ fn traverse_tree(
7174
class_name: Option<String>,
7275
module_name: &str,
7376
imports: &mut HashMap<String, String>,
77+
config: &Config,
7478
) {
7579
let node = cursor.node();
7680
let kind = node.kind();
7781

7882
if is_import_statement(kind, language) {
79-
if let Some((module, alias)) = parse_import_statement(code, node, language) {
83+
if let Some((module, alias)) = parse_import_statement(code, node, language, config) {
84+
println!("Module: {}, Alias: {}", module, alias);
8085
imports.insert(alias, module);
8186
}
8287
} else if is_class_definition(kind, language) {
@@ -98,6 +103,7 @@ fn traverse_tree(
98103
Some(extracted_class_name.clone()),
99104
module_name,
100105
imports,
106+
config,
101107
);
102108
if !cursor.goto_next_sibling() {
103109
break;
@@ -145,6 +151,7 @@ fn traverse_tree(
145151
class_name.clone(),
146152
module_name,
147153
imports,
154+
&config,
148155
);
149156
if !cursor.goto_next_sibling() {
150157
break;
@@ -225,36 +232,144 @@ fn is_import_statement(kind: &str, language: Language) -> bool {
225232
lang if lang == unsafe { tree_sitter_python() } => {
226233
kind == "import_statement" || kind == "import_from_statement"
227234
}
235+
lang if lang == unsafe { tree_sitter_rust() } => kind == "use_declaration",
228236
// Add more language-specific checks here
229237
_ => false,
230238
}
231239
}
232240

233-
fn parse_import_statement(code: &str, node: Node, language: Language) -> Option<(String, String)> {
241+
fn filter_import_matchers(
242+
child: Node,
243+
code: &str,
244+
matchers: &Matchers,
245+
) -> (Option<String>, Option<String>, Option<String>) {
246+
let module = child
247+
.child_by_field_name(&matchers.module_name.field_name)
248+
.map(|n| {
249+
if n.kind() == matchers.module_name.kind {
250+
return n.utf8_text(code.as_bytes()).unwrap_or_default().to_owned();
251+
}
252+
253+
String::default()
254+
});
255+
256+
let name = child
257+
.child_by_field_name(&matchers.object_name.field_name)
258+
.map(|n| {
259+
if n.kind() == matchers.object_name.kind {
260+
return n.utf8_text(code.as_bytes()).unwrap_or_default().to_owned();
261+
}
262+
263+
String::default()
264+
});
265+
266+
let alias = child
267+
.child_by_field_name(&matchers.alias.field_name)
268+
.map(|n| {
269+
if n.kind() == matchers.alias.kind {
270+
return n.utf8_text(code.as_bytes()).unwrap_or_default().to_owned();
271+
}
272+
273+
String::default()
274+
});
275+
276+
(module, name, alias)
277+
}
278+
279+
fn parse_import_statement(
280+
code: &str,
281+
node: Node,
282+
language: Language,
283+
config: &Config,
284+
) -> Option<(String, String)> {
285+
let mut module_name = String::new();
286+
let mut object_name = String::new();
287+
let mut alias_name = String::new();
288+
234289
match language {
235290
lang if lang == unsafe { tree_sitter_python() } => {
236-
if node.kind() == "import_from_statement" {
237-
// Extract the module name from the 'module_name' field
238-
let module = node
239-
.child_by_field_name("module_name")
240-
.map(|n| n.utf8_text(code.as_bytes()).ok())
241-
.flatten()
242-
.unwrap_or_default()
243-
.to_string();
244-
245-
// Iterate over the names imported from the module
246-
let node_walk = &mut node.walk();
247-
let imported_names = node.children_by_field_name("name", node_walk);
248-
for imported_name in imported_names {
249-
let alias = imported_name
250-
.utf8_text(code.as_bytes())
251-
.unwrap_or_default()
252-
.to_string();
253-
254-
// In this case, we assume that each import statement imports a single name,
255-
// so we return the first found. For handling multiple imports, this approach needs to be adjusted.
256-
return Some((module.clone(), alias));
291+
let matchers = &config
292+
.languages
293+
.get("python")
294+
.expect("Failed to get Python matchers from config")
295+
.matchers;
296+
297+
if node.kind() == matchers.import_statement {
298+
let result = filter_import_matchers(node, code, matchers);
299+
(module_name, object_name, alias_name) = (
300+
result.0.unwrap_or(module_name),
301+
result.1.unwrap_or(object_name),
302+
result.2.unwrap_or(alias_name),
303+
);
304+
305+
let mut cursor = node.walk();
306+
for child in node.named_children(&mut cursor) {
307+
let result = filter_import_matchers(child, code, matchers);
308+
(module_name, object_name, alias_name) = (
309+
result.0.unwrap_or(module_name),
310+
result.1.unwrap_or(object_name),
311+
result.2.unwrap_or(alias_name),
312+
);
313+
314+
let mut cursor2 = child.walk();
315+
for child2 in child.named_children(&mut cursor2) {
316+
let result = filter_import_matchers(child2, code, matchers);
317+
(module_name, object_name, alias_name) = (
318+
result.0.unwrap_or(module_name),
319+
result.1.unwrap_or(object_name),
320+
result.2.unwrap_or(alias_name),
321+
);
322+
}
323+
}
324+
325+
println!(
326+
"Module: {}, Object: {}, Alias: {}",
327+
module_name, object_name, alias_name
328+
);
329+
return Some((module_name, object_name));
330+
}
331+
None
332+
},
333+
lang if lang == unsafe { tree_sitter_rust() } => {
334+
let matchers = &config
335+
.languages
336+
.get("rust")
337+
.expect("Failed to get Python matchers from config")
338+
.matchers;
339+
340+
if node.kind() == matchers.import_statement {
341+
let result = filter_import_matchers(node, code, matchers);
342+
(module_name, object_name, alias_name) = (
343+
result.0.unwrap_or(module_name),
344+
result.1.unwrap_or(object_name),
345+
result.2.unwrap_or(alias_name),
346+
);
347+
348+
let mut cursor = node.walk();
349+
for child in node.named_children(&mut cursor) {
350+
let result = filter_import_matchers(child, code, matchers);
351+
(module_name, object_name, alias_name) = (
352+
result.0.unwrap_or(module_name),
353+
result.1.unwrap_or(object_name),
354+
result.2.unwrap_or(alias_name),
355+
);
356+
357+
let mut cursor2 = child.walk();
358+
for child2 in child.named_children(&mut cursor2) {
359+
let result = filter_import_matchers(child2, code, matchers);
360+
(module_name, object_name, alias_name) = (
361+
result.0.unwrap_or(module_name),
362+
result.1.unwrap_or(object_name),
363+
result.2.unwrap_or(alias_name),
364+
);
365+
}
257366
}
367+
368+
println!(
369+
"Module: {}, Object: {}, Alias: {}",
370+
module_name, object_name, alias_name
371+
);
372+
return Some((module_name, object_name));
258373
}
259374
None
260375
}

test-code-base-python/main.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from calc1 import Calc as FakeCalc
2+
from a import b
23

34
def main():
45
c = FakeCalc()

test-code-base-python/main2.py

Lines changed: 0 additions & 5 deletions
This file was deleted.

test-code-base-rust/src/main.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
mod person;
22
mod animal;
33

4+
use person::Person as Animal;
5+
46
fn main() {
5-
let mut person = person::Person::new("Alice".to_string(), 30);
7+
let mut person = Person::new("Alice".to_string(), 30);
68
person.introduce();
79
person.celebrate_birthday();
810

0 commit comments

Comments
 (0)