diff --git a/Dockerfile b/Dockerfile index bea22e2..c4e3ff0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ FROM ubuntu:latest # # Install dependencies required by the application RUN \ apt-get update && \ - apt-get install ca-certificates git -y && \ + apt-get install ca-certificates git ripgrep -y && \ apt-get clean ADD http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb /tmp RUN chmod a+x /tmp/libssl1.1_1.1.1f-1ubuntu2_amd64.deb && \ @@ -45,7 +45,7 @@ ENV PROVIDER=$PROVIDER COPY ./vibi-dpu/target/debug/vibi-dpu /app/vibi-dpu COPY ./pubsub-sa.json /app/pubsub-sa.json COPY ./repo-profiler.pem /app/repo-profiler.pem - +COPY ./prompts /app/prompts # Create directory for configuration RUN mkdir /app/config diff --git a/vibi-dpu/Cargo.toml b/vibi-dpu/Cargo.toml index bc3ff0f..9b4ade5 100644 --- a/vibi-dpu/Cargo.toml +++ b/vibi-dpu/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "vibi-dpu" -version = "1.0.0" +version = "2.0.0" edition = "2021" authors = ["Tapish Rathore "] license = "GPL-3.0-or-later" @@ -21,7 +21,7 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" base64ct = "1.5.3" sha256 = "1.1.1" -reqwest = { version = "0.11", features = ["json", "blocking"] } +reqwest = { version = "0.11", features = ["json", "blocking", "stream"] } google-cloud-pubsub = "0.15.0" google-cloud-default = { version = "0.3.0", features = ["pubsub"] } google-cloud-googleapis = "0.9.0" @@ -37,5 +37,5 @@ once_cell = "1.18.0" # MIT jsonwebtoken = "8.3.0" # MIT fern = "0.6.2" # MIT log = "0.4.20" # MIT/Apache2 - +walkdir = "2.5.0" # Unlicence/MIT # todo - check all lib licences diff --git a/vibi-dpu/src/core/diff_graph.rs b/vibi-dpu/src/core/diff_graph.rs new file mode 100644 index 0000000..0aafc50 --- /dev/null +++ b/vibi-dpu/src/core/diff_graph.rs @@ -0,0 +1,46 @@ +use crate::graph::mermaid_elements::generate_mermaid_flowchart; +use crate::utils::user::ProviderEnum; +use crate::utils::review::Review; +use crate::core::github; +use crate::utils::gitops::StatItem; + +pub async fn send_diff_graph(review: &Review, excluded_files: &Vec, small_files: &Vec, access_token: &str) { + let comment = diff_graph_comment_text(excluded_files, small_files, review).await; + // add comment for GitHub + if review.provider().to_string() == ProviderEnum::Github.to_string() { + log::info!("Inserting comment on repo {}...", review.repo_name()); + github::comment::add_comment(&comment, review, &access_token).await; + } + + // TODO: add comment for Bitbucket +} + +async fn diff_graph_comment_text(excluded_files: &Vec, small_files: &Vec, review: &Review) -> String { + let mut comment = "Diff Graph:\n\n".to_string(); + + let all_diff_files: Vec = excluded_files + .iter() + .chain(small_files.iter()) + .cloned() // Clone the StatItem instances since `iter` returns references + .collect(); // Collect into a new vector + if let Some(mermaid_text) = mermaid_comment(&all_diff_files, review).await { + comment += mermaid_text.as_str(); + } + comment += "\nTo modify DiffGraph settings, go to [your Vibinex settings page.](https://vibinex.com/settings)\n"; + return comment; +} + +async fn mermaid_comment(diff_files: &Vec, review: &Review) -> Option { + let flowchart_str_opt = generate_mermaid_flowchart(diff_files, review).await; + if flowchart_str_opt.is_none() { + log::error!("[mermaid_comment] Unable to generate flowchart for review: {}", review.id()); + return None; + } + let flowchart_str = flowchart_str_opt.expect("Empty flowchart_str_opt"); + let mermaid_comment = format!( + "### Call Stack Diff\n```mermaid\n{}\n```", + flowchart_str, + ); + return Some(mermaid_comment); +} + diff --git a/vibi-dpu/src/core/mod.rs b/vibi-dpu/src/core/mod.rs index a28ba3e..5c685fb 100644 --- a/vibi-dpu/src/core/mod.rs +++ b/vibi-dpu/src/core/mod.rs @@ -4,4 +4,5 @@ pub mod utils; pub mod approval; pub mod bitbucket; pub mod github; -pub mod trigger; \ No newline at end of file +pub mod trigger; +pub mod diff_graph; \ No newline at end of file diff --git a/vibi-dpu/src/core/relevance.rs b/vibi-dpu/src/core/relevance.rs index d939e0e..97807ea 100644 --- a/vibi-dpu/src/core/relevance.rs +++ b/vibi-dpu/src/core/relevance.rs @@ -1,10 +1,10 @@ use std::collections::{HashMap, HashSet}; -use crate::{bitbucket::{self, user::author_from_commit}, core::github, db::review::save_review_to_db, utils::{aliases::get_login_handles, relevance::Relevance, hunk::{HunkMap, PrHunkItem}, user::ProviderEnum}}; +use crate::{bitbucket::{self, user::author_from_commit}, core::github, db::review::save_review_to_db, utils::{aliases::get_login_handles, gitops::StatItem, hunk::{HunkMap, PrHunkItem}, relevance::Relevance, user::ProviderEnum}}; use crate::utils::review::Review; use crate::utils::repo_config::RepoConfig; -pub async fn process_relevance(hunkmap: &HunkMap, review: &Review, +pub async fn process_relevance(hunkmap: &HunkMap, excluded_files: &Vec, review: &Review, repo_config: &mut RepoConfig, access_token: &str, old_review_opt: &Option, ) { log::info!("Processing relevance of code authors..."); @@ -22,7 +22,7 @@ pub async fn process_relevance(hunkmap: &HunkMap, review: &Review, let relevance_vec = relevance_vec_opt.expect("Empty coverage_obj_opt"); if repo_config.comment() { // create comment text - let comment = comment_text(&relevance_vec, repo_config.auto_assign()); + let comment = relevant_reviewers_comment_text(&relevance_vec, repo_config.auto_assign(), excluded_files).await; // add comment if review.provider().to_string() == ProviderEnum::Bitbucket.to_string() { // TODO - add feature flag check @@ -184,7 +184,8 @@ async fn calculate_relevance(prhunk: &PrHunkItem, review: &mut Review) -> Option return Some(relevance_vec); } -fn comment_text(relevance_vec: &Vec, auto_assign: bool) -> String { +async fn relevant_reviewers_comment_text(relevance_vec: &Vec, auto_assign: bool, + excluded_files: &Vec) -> String { let mut comment = "Relevant users for this PR:\n\n".to_string(); // Added two newlines comment += "| Contributor Name/Alias | Relevance |\n"; // Added a newline at the end comment += "| -------------- | --------------- |\n"; // Added a newline at the end @@ -208,6 +209,14 @@ fn comment_text(relevance_vec: &Vec, auto_assign: bool) -> String { comment += &format!("Missing profile handles for {} aliases. [Go to your Vibinex settings page](https://vibinex.com/settings) to map aliases to profile handles.", unmapped_aliases.len()); } + if !excluded_files.is_empty() { + comment += "\n\n"; + comment += "Ignoring following files due to large size: "; + for file_item in excluded_files { + comment += &format!("- {}\n", file_item.filepath.as_str()); + } + } + if auto_assign { comment += "\n\n"; comment += "Auto assigning to relevant reviewers."; @@ -216,7 +225,6 @@ fn comment_text(relevance_vec: &Vec, auto_assign: bool) -> String { comment += "If you are a relevant reviewer, you can use the [Vibinex browser extension](https://chromewebstore.google.com/detail/vibinex-code-review/jafgelpkkkopeaefadkdjcmnicgpcncc) to see parts of the PR relevant to you\n"; // Added a newline at the end comment += "Relevance of the reviewer is calculated based on the git blame information of the PR. To know more, hit us up at contact@vibinex.com.\n\n"; // Added two newlines comment += "To change comment and auto-assign settings, go to [your Vibinex settings page.](https://vibinex.com/u)\n"; // Added a newline at the end - return comment; } diff --git a/vibi-dpu/src/core/review.rs b/vibi-dpu/src/core/review.rs index 2622d91..7e3f273 100644 --- a/vibi-dpu/src/core/review.rs +++ b/vibi-dpu/src/core/review.rs @@ -1,9 +1,9 @@ -use std::env; +use std::{env, thread, time::Duration}; use serde_json::Value; use crate::{ - core::{relevance::process_relevance, utils::get_access_token}, + core::{relevance::process_relevance, diff_graph::send_diff_graph, utils::get_access_token}, db::{ hunk::{get_hunk_from_db, store_hunkmap_to_db}, repo::get_clone_url_clone_dir, @@ -11,7 +11,7 @@ use crate::{ review::{get_review_from_db, save_review_to_db}, }, utils::{ - gitops::{commit_exists, generate_blame, generate_diff, get_excluded_files, git_pull, process_diffmap}, + gitops::{commit_exists, generate_blame, generate_diff, get_excluded_files, git_pull, process_diffmap, StatItem}, hunk::{HunkMap, PrHunkItem}, repo_config::RepoConfig, reqwest_client::get_client, @@ -41,11 +41,24 @@ pub async fn process_review(message_data: &Vec) { } let access_token = access_token_opt.expect("Empty access_token_opt"); commit_check(&review, &access_token).await; - let hunkmap_opt = process_review_changes(&review).await; - send_hunkmap(&hunkmap_opt, &review, &repo_config, &access_token, &old_review_opt).await; + process_review_changes(&review, &repo_config, &access_token, &old_review_opt).await; } -pub async fn send_hunkmap(hunkmap_opt: &Option, review: &Review, +pub async fn process_review_changes(review: &Review, repo_config: &RepoConfig, access_token: &str, old_review_opt: &Option) { + log::info!("Processing changes in code..."); + if let Some((excluded_files, smallfiles)) = get_included_and_excluded_files(review) { + let hunkmap_opt = calculate_hunkmap(review, &smallfiles).await; + send_hunkmap(&hunkmap_opt, &excluded_files, review, repo_config, access_token, old_review_opt).await; + + if repo_config.diff_graph() { + send_diff_graph(review, &excluded_files, &smallfiles, access_token).await; + } + } else { + log::error!("Failed to get included and excluded files"); + } +} + +pub async fn send_hunkmap(hunkmap_opt: &Option, excluded_files: &Vec, review: &Review, repo_config: &RepoConfig, access_token: &str, old_review_opt: &Option) { if hunkmap_opt.is_none() { log::error!("[send_hunkmap] Empty hunkmap in send_hunkmap"); @@ -58,7 +71,7 @@ pub async fn send_hunkmap(hunkmap_opt: &Option, review: &Review, let hunkmap_async = hunkmap.clone(); let review_async = review.clone(); let mut repo_config_clone = repo_config.clone(); - process_relevance(&hunkmap_async, &review_async, + process_relevance(&hunkmap_async, &excluded_files, &review_async, &mut repo_config_clone, access_token, old_review_opt).await; } @@ -73,16 +86,20 @@ fn hunk_already_exists(review: &Review) -> bool { log::debug!("[hunk_already_exists] Hunk already in db!"); return true; } -pub async fn process_review_changes(review: &Review) -> Option{ - log::info!("Processing changes in code..."); - let mut prvec = Vec::::new(); + +fn get_included_and_excluded_files(review: &Review) -> Option<(Vec, Vec)> { let fileopt = get_excluded_files(&review); log::debug!("[process_review_changes] fileopt = {:?}", &fileopt); if fileopt.is_none() { log::error!("[process_review_changes] No files to review for PR {}", review.id()); return None; } - let (_, smallfiles) = fileopt.expect("fileopt is empty"); + let (excluded_files, smallfiles) = fileopt.expect("fileopt is empty"); + return Some(( excluded_files, smallfiles)); +} + +async fn calculate_hunkmap(review: &Review, smallfiles: &Vec) -> Option { + let mut prvec = Vec::::new(); let diffmap = generate_diff(&review, &smallfiles); log::debug!("[process_review_changes] diffmap = {:?}", &diffmap); let linemap = process_diffmap(&diffmap); @@ -109,6 +126,7 @@ pub async fn commit_check(review: &Review, access_token: &str) { if !commit_exists(&review.base_head_commit(), &review.clone_dir()) || !commit_exists(&review.pr_head_commit(), &review.clone_dir()) { log::info!("Executing git pull on repo {}...", &review.repo_name()); + thread::sleep(Duration::from_secs(1)); git_pull(review, access_token).await; } } @@ -213,7 +231,7 @@ fn create_and_save_github_review_object(deserialized_data: &Value) -> Option) { // commit_check commit_check(&review, &access_token).await; // process_review_changes - let hunkmap_opt = process_review_changes(&review).await; - // send_hunkmap - send_hunkmap(&hunkmap_opt, &review, &repo_config, &access_token, &None).await; + process_review_changes(&review, &repo_config, &access_token, &None).await; } fn parse_message_fields(msg: &Value) -> Option { diff --git a/vibi-dpu/src/db/review.rs b/vibi-dpu/src/db/review.rs index 1d8ce0c..a1d2909 100644 --- a/vibi-dpu/src/db/review.rs +++ b/vibi-dpu/src/db/review.rs @@ -38,7 +38,7 @@ pub fn get_review_from_db(repo_name: &str, repo_owner: &str, let review_res = serde_json::from_slice(&ivec); if let Err(e) = review_res { log::error!( - "[get_handles_from_db] Failed to deserialize review from json: {:?}", + "[get_review_from_db] Failed to deserialize review from json: {:?}", e ); return None; diff --git a/vibi-dpu/src/graph/elements.rs b/vibi-dpu/src/graph/elements.rs new file mode 100644 index 0000000..9327904 --- /dev/null +++ b/vibi-dpu/src/graph/elements.rs @@ -0,0 +1,501 @@ +use std:: + collections::HashMap +; +use serde::{Serialize, Deserialize}; +// TODO, FIXME - remove all unwraps + +use crate::utils::review::Review; + +use super::utils::generate_random_string; + +#[derive(Debug, Default, Clone)] +pub struct MermaidSubgraph { + name: String, + nodes: HashMap, + mermaid_id: String, + color: String +} + +impl MermaidSubgraph { + // Constructor + pub fn new(name: String) -> Self { + let mermaid_id = generate_random_string(4); + Self { + name, + nodes: HashMap::new(), + mermaid_id, + color: "".to_string() + } + } + + // Getter for nodes + pub fn nodes(&self) -> &HashMap { + &self.nodes + } + + pub fn mermaid_id(&self) -> &String { + &self.mermaid_id + } + + pub fn set_color(&mut self, color: &str) { + self.color = color.to_string(); + } + + pub fn name(&self) -> &String { + &self.name + } + + // Setter for nodes + pub fn set_nodes(&mut self, nodes: HashMap) { + self.nodes = nodes; + } + + pub fn add_node(&mut self, node: MermaidNode) { + if self.nodes.contains_key(node.function_name()) { + log::error!( + "[add_node] Node already exists: old - {:#?}, new - {:#?}", + &self.nodes[node.function_name()], + node + ); + return; + } + self.nodes.insert(node.function_name().to_string(), node.to_owned()); + } + + pub fn get_node(&self, func_name: &str) -> Option<&MermaidNode> { + self.nodes.get(func_name) + } + + pub fn get_mut_node(&mut self, func_name: &str) -> Option<&mut MermaidNode> { + self.nodes.get_mut(func_name) + } + + pub fn render_subgraph(&self, review: &Review, subgraph_map: &HashMap) -> String { + let mut all_nodes = Vec::new(); + for (_, node) in self.nodes() { + all_nodes.push(node.render_node(review, subgraph_map)); + } + let mut subg_name: &str = self.name(); + if subg_name == "" { + subg_name = "unidentified_file"; + } + let subgraph_str = format!( + "\tsubgraph {} [\"{}\"]\n{}\n\tend\n{}\n", + self.mermaid_id, + subg_name, + all_nodes.join("\n"), + self.render_subgraph_style() + ); + subgraph_str + } + + fn render_subgraph_style(&self) -> String { + let mut class_str = ""; + for (_, node) in self.nodes() { + match node.color().as_str() { + "yellow" => { + class_str = "modified"; + break; + }, + "red" => { + match class_str { + "green" | "yellow" => { + class_str = "modified"; + break; + }, + "" | "red" | _ => { + class_str = "red"; + } + } + }, + "green" => { + match class_str { + "red" | "yellow" => { + class_str = "modified"; + break; + }, + "" | "green" | _ => { + class_str = "green"; + } + } + } + "" | _ => () + } + } + if class_str != "" { + return format!("\tclass {} {}", self.mermaid_id(), class_str); + } + return "".to_string(); + } +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct MermaidNode { + function_name: String, + mermaid_id: String, + parent_id: String, + color: String, + def_line: usize +} + +impl MermaidNode { + // Constructor + pub fn new(function_name: String, parent_id: String, def_line: usize) -> Self { + let mermaid_id = generate_random_string(4); + Self { + mermaid_id, + function_name, + parent_id, + color: "".to_string(), + def_line + } + } + + pub fn color(&self) -> &String { + &self.color + } + + pub fn function_name(&self) -> &String { + &self.function_name + } + + // Getter for mermaid_id + pub fn mermaid_id(&self) -> &String { + &self.mermaid_id + } + + pub fn parent_id(&self) -> &String { + &self.parent_id + } + + pub fn set_color(&mut self, color: &str) { + self.color = color.to_string() + } + + pub fn compare_and_change_color(&mut self, node_color: &str) { + if (self.color() == "red" && node_color == "green") || + (self.color() == "green" && node_color == "red") { + self.set_color("yellow"); + } + } + + pub fn render_node(&self, review: &Review, subgraph_map: &HashMap) -> String { + let url_str = format!("\tclick {} href \"{}\" _blank", + self.mermaid_id(), self.get_node_str(review, subgraph_map)); + let class_str = self.get_style_class(); + let mut func_name: &str = self.function_name(); + if func_name == "" { + func_name = "unidentified_func"; + } + let node_str = format!("\t{}[\"{}\"]", &self.mermaid_id, func_name); + return format!("{}\n{}\n{}", &node_str, &class_str, &url_str); + } + + fn get_node_str(&self, review: &Review, subgraph_map: &HashMap) -> String { + if let Some(subgraph) = subgraph_map.get(self.parent_id()) { + let file_hash = sha256::digest(subgraph.name()); + return match self.color.as_str() { + "green" | "yellow" => { + let diff_side_str = "R"; + format!( + "https://github.com/{}/{}/pull/{}/files#diff-{}{}{}", + review.repo_owner(), + review.repo_name(), + review.id(), + &file_hash, + diff_side_str, + self.def_line + ) + } + "red" => { + let diff_side_str = "L"; + format!( + "https://github.com/{}/{}/pull/{}/files#diff-{}{}{}", + review.repo_owner(), + review.repo_name(), + review.id(), + &file_hash, + diff_side_str, + self.def_line + ) + } + "" | _ => format!( + "https://github.com/{}/{}/blob/{}/{}#L{}", + review.repo_owner(), + review.repo_name(), + review.base_head_commit(), + subgraph.name(), + self.def_line + ), + }; + } + return "".to_string(); + } + + fn get_style_class(&self) -> String { + let class_str_prefix = format!("class {}", self.mermaid_id()); + match self.color.as_str() { + "green" => format!("\t{} added", &class_str_prefix), + "red" => format!("\t{} deleted", &class_str_prefix), + "yellow" => format!("\t{} modified", &class_str_prefix), + "" | _ => "".to_string() + } + } +} + +#[derive(Debug, Default, Clone)] +pub struct MermaidEdge { + line: usize, + src_func_key: String, + src_subgraph_key: String, + dest_func_key: String, + dest_subgraph_key: String, + color: String +} + +impl MermaidEdge { + // Constructor + pub fn new( + line: usize, + src_func_key: String, + src_subgraph_key: String, + dest_func_key: String, + dest_subgraph_key: String, + color: String, + ) -> Self { + Self { + line, + src_func_key, + src_subgraph_key, + dest_func_key, + dest_subgraph_key, + color, + } + } + + // Getter for line + pub fn line(&self) -> usize { + self.line + } + + // Getter for color + pub fn color(&self) -> &String { + &self.color + } + + // Getter for src_func_key + pub fn src_func_key(&self) -> &String { + &self.src_func_key + } + + // Getter for src_subgraph_key + pub fn src_subgraph_key(&self) -> &String { + &self.src_subgraph_key + } + + // Getter for dest_func_key + pub fn dest_func_key(&self) -> &String { + &self.dest_func_key + } + + // Getter for dest_subgraph_key + pub fn dest_subgraph_key(&self) -> &String { + &self.dest_subgraph_key + } + + // Setter for color + pub fn set_color(&mut self, color: &str) { + self.color = color.to_string(); + } + + pub fn compare_and_set_color(&mut self, edge_color: &str) { + if (self.color() == "green" && edge_color == "red") || + (self.color() == "red" && edge_color == "green") { + self.set_color("yellow"); + } + } + + pub fn add_edge_and_nodes(&mut self) { + // add edge and source and destination nodes + } + + pub fn get_edge_key(&self) -> String { + let edge_key = format!( + "{}/{}/{}/{}/{}", self.src_subgraph_key(), self.src_func_key(), + self.line(), + self.dest_subgraph_key(), self.dest_func_key() + ); + return edge_key; + } +} + +#[derive(Debug, Default, Clone)] +pub struct MermaidGraphElements { + edges: HashMap, + subgraphs: HashMap, +} + +impl MermaidGraphElements { + pub fn new() -> Self { + Self { + edges: HashMap::new(), + subgraphs: HashMap::new(), + } + } + + pub fn add_edge( + &mut self, + edge_color: &str, + calling_line_num: usize, + source_func_name: &str, + dest_func_name: &str, + source_file: &str, + dest_file: &str, + source_color: &str, + dest_color: &str, + source_def_line: &usize, + dest_def_line: &usize + ) { + self.create_or_modify_node(source_file, source_func_name, source_color, source_def_line); + self.create_or_modify_node(dest_file, dest_func_name, dest_color, dest_def_line); + let edge = MermaidEdge::new( + calling_line_num, + source_func_name.to_string(), + source_file.to_string(), + dest_func_name.to_string(), + dest_file.to_string(), + edge_color.to_string()); + log::debug!("[add_edge] edge = {:#?}", &edge); + self.add_edge_to_edges(edge); + } + + fn create_or_modify_node(&mut self, subgraph_key: &str, node_func_name: &str, node_color: &str, def_line: &usize) { + if let Some(subgraph) = self.subgraphs.get_mut(subgraph_key) { + if let Some(node) = subgraph.get_mut_node(node_func_name) { + node.compare_and_change_color(node_color); + } else { + let mut node = MermaidNode::new(node_func_name.to_string(), + subgraph.name().to_string(), def_line.to_owned()); + node.set_color(node_color); + subgraph.add_node(node); + } + } else { + let mut subgraph = MermaidSubgraph::new(subgraph_key.to_string()); + let mut node = MermaidNode::new(node_func_name.to_string(), + subgraph.name().to_string(), def_line.to_owned()); + node.set_color(node_color); + subgraph.add_node(node); + self.add_subgraph(subgraph); + } + } + + fn add_subgraph(&mut self, subgraph: MermaidSubgraph) { + if !self.subgraphs.contains_key(subgraph.name()) { + self.subgraphs.insert(subgraph.name().to_string(), subgraph); + } + } + + fn add_edge_to_edges(&mut self, edge: MermaidEdge) { + let edge_key = edge.get_edge_key(); + if let Some(edge_mut) = self.edges.get_mut(&edge_key) { + edge_mut.compare_and_set_color(edge.color()); + return; + } + self.edges.insert(edge_key, edge); + } + + fn render_subgraphs(&self, review: &Review) -> String { + format!("{}\n{}", + self.subgraphs + .values() + .map(|subgraph| subgraph.render_subgraph(review, &self.subgraphs)) + .collect::>() + .join("\n"), + self.subgraph_style_defs()) + } + + fn subgraph_style_defs(&self) -> String { + let modified_class_def = "\tclassDef modified stroke:black,fill:yellow"; + let added_class_def = "\tclassDef added stroke:black,fill:#b7e892,color:black"; + let deleted_class_def = "\tclassDef deleted stroke:black,fill:red"; + format!("{}\n{}\n{}", modified_class_def, added_class_def, deleted_class_def) + } + + pub fn render_elements(&self, review: &Review) -> String { + let all_elements_str = format!("{}\n{}", &self.render_subgraphs(review), &self.render_edges(review)); + all_elements_str + } + + fn render_edges(&self, review: &Review) -> String { + let mut edge_defs = Vec::::new(); + let mut default_edge_styles = Vec::::new(); + let mut green_edge_styles = Vec::::new(); + let mut red_edge_styles = Vec::::new(); + let mut yellow_edge_styles = Vec::::new(); + for (_, edge) in &self.edges { + let src_node_id = self.subgraphs[edge.src_subgraph_key()].nodes()[edge.src_func_key()].mermaid_id(); + let dest_node_id = self.subgraphs[edge.dest_subgraph_key()].nodes()[edge.dest_func_key()].mermaid_id(); + let file_hash = sha256::digest(edge.src_subgraph_key()); + let edge_link_str = match edge.color().as_str() { + "red" => format!("https://github.com/{}/{}/pull/{}/files#diff-{}L{}", + review.repo_owner(), + review.repo_name(), + review.id(), + &file_hash, + edge.line() + ), + "green" | "yellow" => format!("https://github.com/{}/{}/pull/{}/files#diff-{}R{}", + review.repo_owner(), + review.repo_name(), + review.id(), + &file_hash, + edge.line() + ), + "" | _ => format!("https://github.com/{}/{}/blob/{}/{}#L{}", + review.repo_owner(), + review.repo_name(), + review.base_head_commit(), + edge.src_subgraph_key(), + edge.line() + ) + }; + let edge_def_str = format!( + "\t{} ==\"Line {}\" =====>{}", + src_node_id, edge_link_str, edge.line(), dest_node_id); + edge_defs.push(edge_def_str); + match edge.color().as_str() { + "red" => red_edge_styles.push((edge_defs.len() - 1).to_string()), + "green" => green_edge_styles.push((edge_defs.len() - 1).to_string()), + "yellow" => yellow_edge_styles.push((edge_defs.len() - 1).to_string()), + "" | _ => default_edge_styles.push((edge_defs.len() - 1).to_string()) + } + } + if !edge_defs.is_empty() { + let default_edges_str = match default_edge_styles.is_empty() { + true => "".to_string(), + false => format!("\tlinkStyle {} stroke-width:1", default_edge_styles.join(",")) + }; + let green_edges_str = match green_edge_styles.is_empty() { + true => "".to_string(), + false => format!("\tlinkStyle {} stroke:green,stroke-width:8", green_edge_styles.join(",")) + }; + let red_edges_str = match red_edge_styles.is_empty() { + true => "".to_string(), + false => format!("\tlinkStyle {} stroke:red,stroke-width:10", red_edge_styles.join(",")) + }; + let yellow_edges_str = match yellow_edge_styles.is_empty() { + true => "".to_string(), + false => format!("\tlinkStyle {} stroke:#ffe302,stroke-width:10", yellow_edge_styles.join(",")) + }; + return format!("{}\n{}\n{}\n{}\n{}", + edge_defs.join("\n"), + &default_edges_str, + &green_edges_str, + &red_edges_str, + &yellow_edges_str + ); + } + + return "".to_string(); + } +} diff --git a/vibi-dpu/src/graph/file_imports.rs b/vibi-dpu/src/graph/file_imports.rs new file mode 100644 index 0000000..e92f98e --- /dev/null +++ b/vibi-dpu/src/graph/file_imports.rs @@ -0,0 +1,462 @@ +use std::{collections::HashMap, path::PathBuf}; + +use serde::{Deserialize, Serialize}; +use serde_json::json; + +use crate::{graph::utils::numbered_content, utils::review::Review}; + +use super::utils::{all_code_files, call_llm_api, read_file}; + +// #[derive(Debug, Serialize, Default, Deserialize, Clone)] +// struct LlmImportLineInput { +// language: String, +// file_path: String, +// chunk: String +// } + +// #[derive(Debug, Serialize, Default, Deserialize, Clone)] +// struct LlmImportLineRequest { +// input: LlmImportLineInput +// } + +// #[derive(Debug, Serialize, Default, Deserialize, Clone)] +// pub struct FileImportLines { +// lines: Vec +// } + +// #[derive(Debug, Serialize, Default, Deserialize, Clone)] +// struct LlmImportPathInput { +// language: String, +// file_path: String, +// import_lines: String +// } + +// #[derive(Debug, Serialize, Default, Deserialize, Clone)] +// struct LlmImportPathRequest { +// input: LlmImportPathInput +// } + +// #[derive(Debug, Serialize, Default, Deserialize, Clone)] +// pub struct ImportPath { +// import_line: usize, +// import_path: String, +// imported: String +// } + +// impl PartialEq for ImportPath { +// fn eq(&self, other: &Self) -> bool { +// self.import_line == other.import_line && self.import_path == other.import_path && self.imported == other.imported +// } +// } + +// impl ImportPath { + +// pub fn new(import_line: usize, import_path: String, imported: String) -> Self { +// Self { import_line, import_path, imported } +// } +// pub fn import_path(&self) -> &String { +// &self.import_path +// } + +// pub fn imported(&self) -> &String { +// &self.imported +// } +// } + +// #[derive(Debug, Serialize, Default, Deserialize, Clone)] +// pub struct ImportPaths { +// imports: Vec, +// } + +// impl ImportPaths { +// pub fn imports(&self) -> &Vec { +// &self.imports +// } +// } + +// #[derive(Debug, Serialize, Default, Deserialize, Clone)] +// pub struct ChunkImportInfo { +// import_lines: FileImportLines, +// import_paths: Vec +// } + +// impl ChunkImportInfo { +// pub fn import_paths(&self) -> &Vec { +// &self.import_paths +// } +// } + +// #[derive(Debug, Serialize, Default, Deserialize, Clone)] +// pub struct FileImportInfo { +// import_chunk_info: Vec, +// filepath: String +// } + +// impl FileImportInfo { +// pub fn all_import_paths(&self) -> Vec { +// let all_paths: Vec = self.import_chunk_info +// .iter() +// .flat_map(|chunk| chunk.import_paths()) +// .cloned() +// .collect(); +// return all_paths; +// } + +// pub fn filepath(&self) -> &String { +// &self.filepath +// } +// } + +// #[derive(Debug, Serialize, Default, Deserialize, Clone)] +// pub struct FilesImportInfo { +// file_import_map: HashMap +// } + +// impl FilesImportInfo { +// pub fn files(&self) -> Vec<&String> { +// self.file_import_map.keys().collect() +// } + +// pub fn is_import_in_file(&self, filename: &str, import_path: &ImportPath) -> bool { +// self.file_import_map[filename].all_import_paths().contains(import_path) +// } + +// pub fn file_import_info(&self, filename: &str) -> Option<&FileImportInfo> { +// self.file_import_map.get(filename) +// } + +// pub fn file_import_map(&self) -> &HashMap { +// &self.file_import_map +// } +// } + +// pub async fn get_import_lines(file_paths: &Vec) -> Option { +// let mut all_import_info = HashMap::::new(); +// let system_prompt_opt = read_file("/app/prompts/prompt_import_lines"); +// if system_prompt_opt.is_none() { +// log::error!("[get_import_lines] Unable to read prompt_import_lines"); +// return None; +// } +// let system_prompt_lines = system_prompt_opt.expect("Empty system_prompt"); +// let system_prompt_path_opt = read_file("/app/prompts/prompt_import_path"); +// if system_prompt_path_opt.is_none() { +// log::error!("[get_import_lines] Unable to read prompt_import_path"); +// return None; +// } +// let system_prompt_path = system_prompt_path_opt.expect("Empty system_prompt"); +// for path in file_paths { +// log::debug!("[get_import_lines] path = {:?}", path); +// let file_contents_res = std::fs::read_to_string(path.clone()); +// if file_contents_res.is_err() { +// let e = file_contents_res.expect_err("Empty error in file_content_res"); +// log::error!("[get_import_lines] Unable to read file: {:?}, error: {:?}", path, e); +// continue; +// } +// let file_contents = file_contents_res.expect("Uncaught error in file_content_res"); +// let numbered_content = numbered_content(file_contents); +// let chunks = numbered_content.chunks(20); +// let path_str = path.to_str().expect("Empty path"); +// let mut chunks_import_vec = Vec::::new(); +// for chunk in chunks { +// let chunk_str = chunk.join("\n"); +// let import_lines_opt = get_import_lines_chunk( +// &system_prompt_lines, &chunk_str, +// path_str).await; +// if import_lines_opt.is_none() { +// log::error!("[get_import_lines] Skipping chunk, unable to get import lines"); +// continue; +// } +// let import_lines_chunk = import_lines_opt.expect("Empty func_boundary_opt"); +// if let Some(import_paths) = get_import_path_file(&numbered_content, +// import_lines_chunk.clone(), &system_prompt_path, path_str).await { +// let chunk_import_info = ChunkImportInfo { import_lines: import_lines_chunk, import_paths }; +// chunks_import_vec.push(chunk_import_info); +// } +// } +// let import_info = FileImportInfo { +// import_chunk_info: chunks_import_vec, filepath: path_str.to_string() }; +// all_import_info.insert(path_str.to_string(), import_info); +// } +// if all_import_info.is_empty() { +// return None; +// } +// return Some(FilesImportInfo { file_import_map: all_import_info }); +// } + +// async fn get_import_lines_chunk(system_prompt_lines: &str, chunk_str: &str, file_path: &str) -> Option { +// let llm_req = LlmImportLineRequest { input: +// LlmImportLineInput { +// language: "rust".to_string(), +// file_path: file_path.to_string(), +// chunk: chunk_str.to_string() } }; +// let llm_req_res = serde_json::to_string(&llm_req); +// if llm_req_res.is_err() { +// log::error!("[get_import_lines_chunk] Error in serializing llm req: {}", llm_req_res.expect_err("Empty error in llm_req_res")); +// return None; +// } +// let llm_req_prompt = llm_req_res.expect("Uncaught error in llm_req_res"); +// let prompt = format!("{}\n\n### User Message\nInput -\n{}\nOutput -", +// system_prompt_lines, llm_req_prompt); +// match call_llm_api(prompt).await { +// None => { +// log::error!("[get_import_lines_chunk] Failed to call LLM API"); +// return None; +// } +// Some(llm_response) => { +// let import_res = serde_json::from_str(&llm_response); +// if import_res.is_err() { +// log::error!( +// "[get_import_lines_chunk] funcdefs error: {}", +// import_res.expect_err("Empty error in funcdefs_res")); +// return None; +// } +// let import_lines_file: FileImportLines = import_res.expect("Uncaught error in funcdefs_res"); +// return Some(import_lines_file); +// } +// } +// } + +#[derive(Serialize, Deserialize, Debug)] +struct InputSchema { + function_name: String, + code_chunk: String, + language: String, + file_path: String, +} + +// Output schema structure for matching import +#[derive(Serialize, Deserialize, Debug)] +pub struct MatchingImport { + line_number: u32, + import_statement: String, + possible_file_path: String, +} + +impl MatchingImport { + pub fn possible_file_path(&self) -> &String { + &self.possible_file_path + } +} + +// Full output schema structure +#[derive(Serialize, Deserialize, Debug)] +pub struct ImportPathOutput { + matching_import: MatchingImport, + notes: Option, +} + +impl ImportPathOutput { + pub fn get_matching_import(&self) -> &MatchingImport { + &self.matching_import + } +} + +// Instruction structure +#[derive(Serialize, Deserialize, Debug)] +struct Instructions { + input_schema: InputSchemaDescription, + output_schema: OutputSchemaDescription, + task_description: String, +} + +// Description of input schema +#[derive(Serialize, Deserialize, Debug)] +struct InputSchemaDescription { + function_name: String, + code_chunk: String, + language: String, + file_path: String, +} + +// Description of output schema +#[derive(Serialize, Deserialize, Debug)] +struct OutputSchemaDescription { + matching_import: MatchingImportDescription, + notes: String, +} + +// Description for matching import schema +#[derive(Serialize, Deserialize, Debug)] +struct MatchingImportDescription { + line_number: String, + import_statement: String, + possible_file_path: String, +} + +// Complete structure for JSON input and output +#[derive(Serialize, Deserialize, Debug)] +struct ImportPathJsonStructure { + instructions: Instructions, + sample_input: InputSchema, + expected_output: ImportPathOutput, + input: Option, +} + +impl ImportPathJsonStructure { + fn set_input(&mut self, input_schema: InputSchema) { + self.input = Some(input_schema); + } +} + +pub struct ImportIdentifier { + prompt_struct: ImportPathJsonStructure +} + +impl ImportIdentifier { + pub fn new() -> Option { + let system_prompt_opt = read_file("/app/prompts/prompt_import_file_path"); + if system_prompt_opt.is_none() { + log::debug!("[ImportIdentifier/new] Unable to read prompt_import_file"); + return None; + } + let system_prompt_str = system_prompt_opt.expect("Empty system_prompt_opt"); + let sys_prompt_struct_res = serde_json::from_str(&system_prompt_str); + if sys_prompt_struct_res.is_err() { + log::debug!("[ImportIdentifier/new] Unable to deserialize sys prompt: {:?}", + sys_prompt_struct_res.expect_err("Empty error")); + return None; + } + let sys_prompt_struct: ImportPathJsonStructure = sys_prompt_struct_res.expect("Uncaught error in sys_prompt_struct_res"); + return Some(Self { + prompt_struct: sys_prompt_struct + }); + } + async fn get_import_path(&mut self, func_name: &str, lang: &str, file_path: &str, chunk: &str) -> Option{ + // create prompt + let input_schema = InputSchema { + function_name: func_name.to_string(), + code_chunk: chunk.to_string(), + language: lang.to_string(), + file_path: file_path.to_string(), + }; + self.prompt_struct.set_input(input_schema); + // call api + let import_struct_str_res = serde_json::to_string(&self.prompt_struct); + if import_struct_str_res.is_err() { + log::debug!( + "[ImportIdentifier/get_import_path] Unable to deserialize prompt struct: {:?}", + import_struct_str_res.expect_err("Empty error in import_struct_str_res")); + return None; + } + let import_struct_str = import_struct_str_res.expect("Uncaught error in import_struct_str_res"); + let prompt_str = format!("{}\nOutput -", &import_struct_str); + let import_path_opt = call_llm_api(prompt_str).await; + // deserialize output + if import_path_opt.is_none() { + log::debug!("[ImportIdentifier/get_import_path] Unable to call llm api"); + return None; + } + let import_path_str = import_path_opt.expect("Empty import_path_opt"); + let import_path_res = serde_json::from_str(&import_path_str); + if import_path_res.is_err() { + log::debug!( + "[ImportIdentifier/get_import_path] Unable to deserialize import path output : {:?}", + import_path_res.expect_err("Empty error in import_path_res")); + return None; + } + let import_path: ImportPathOutput = import_path_res.expect("Unacaught error in import_path_res"); + return Some(import_path); + } + + pub async fn get_import_path_file(&mut self, file_path: &str, lang: &str, func_name: &str) -> Option { + let file_contents_res = std::fs::read_to_string(file_path); + if file_contents_res.is_err() { + let e = file_contents_res.expect_err("Empty error in file_content_res"); + log::error!("[get_import_lines] Unable to read file: {:?}, error: {:?}", file_path, e); + return None; + } + let file_contents = file_contents_res.expect("Uncaught error in file_content_res"); + let numbered_content = numbered_content(file_contents); + let chunks = numbered_content.chunks(20); + for chunk in chunks { + let chunk_str = chunk.join("\n"); + let import_path_opt = self.get_import_path(func_name, lang, file_path, &chunk_str).await; + if import_path_opt.is_some() { + return import_path_opt; + } + } + return None; + } +} + +// async fn get_import_path_file(chunk: &Vec, func_name: &str, lang: &str, file_path: &str) -> Option> { + + + +// let mut import_paths = Vec::::new(); +// // get import lines from numbered lines +// let import_lines_str_opt = numbered_import_lines(numbered_content, import_line); +// if import_lines_str_opt.is_none() { +// log::error!("[get_import_path_file] Unable to get numbered import line"); +// return None; +// } +// let import_lines_str_chunks = import_lines_str_opt.expect("Empty import_lines_str_opt"); +// for import_lines_chunk in import_lines_str_chunks { +// let llm_req = LlmImportPathRequest{ +// input: LlmImportPathInput { +// language: "rust".to_string(), +// file_path: file_path.to_string(), +// import_lines: import_lines_chunk +// } +// }; +// let llm_req_res = serde_json::to_string(&llm_req); +// if llm_req_res.is_err() { +// log::error!("[get_import_path_file] Error in serializing llm req: {}", llm_req_res.expect_err("Empty error in llm_req_res")); +// return None; +// } +// let llm_req_prompt = llm_req_res.expect("Uncaught error in llm_req_res"); +// let prompt = format!("{}\n\n### User Message\nInput -\n{}\nOutput -", +// system_prompt, llm_req_prompt); +// match call_llm_api(prompt).await { +// None => { +// log::error!("[get_import_path_file] Failed to call LLM API"); +// return None; +// } +// Some(llm_response) => { +// let import_res = serde_json::from_str(&llm_response); +// if import_res.is_err() { +// log::error!( +// "[get_import_path_file] funcdefs error: {}", +// import_res.expect_err("Empty error in funcdefs_res")); +// continue; +// } +// let import_path: ImportPaths = import_res.expect("Uncaught error in funcdefs_res"); +// import_paths.push(import_path); +// } +// } +// } +// if import_paths.is_empty() { +// return None; +// } +// let import_path_vec: Vec = import_paths +// .iter() +// .flat_map(|ip| ip.imports.iter().cloned()) +// .collect(); +// return Some(import_path_vec); +// } + +// fn numbered_import_lines(numbered_content: &Vec, import_line: FileImportLines) -> Option>{ +// let mut chunks = Vec::new(); +// let mut chunk = String::new(); +// let mut line_count = 0; + +// for line in import_line.lines { +// if line_count == 10 { +// chunks.push(chunk.clone()); +// chunk = String::new(); +// line_count = 0; +// } +// chunk += &numbered_content[line as usize]; +// line_count += 1; +// } + +// // Push the last chunk if it's not empty +// if !chunk.is_empty() { +// chunks.push(chunk); +// } + +// if chunks.is_empty() { +// return None; +// } +// Some(chunks) +// } \ No newline at end of file diff --git a/vibi-dpu/src/graph/function_call.rs b/vibi-dpu/src/graph/function_call.rs new file mode 100644 index 0000000..f21fa28 --- /dev/null +++ b/vibi-dpu/src/graph/function_call.rs @@ -0,0 +1,384 @@ +use std::{collections::{HashMap, HashSet}, io::BufReader, path::{Path, PathBuf}, process::{Command, Stdio}}; + +use serde::{Deserialize, Serialize}; +use std::io::BufRead; +use crate::utils::review::Review; + +use super::{gitops::{HunkDiffLines, HunkDiffMap}, utils::{call_llm_api, detect_language, numbered_content, read_file}}; + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct FunctionCallChunk { + function_calls: Vec, + #[serde(skip_deserializing)] + function_name: Option +} + +impl FunctionCallChunk { + pub fn new(function_calls: Vec, function_name: String) -> Self { + Self { function_calls, function_name: Some(function_name) } + } + pub fn function_calls(&self) -> &Vec { + &self.function_calls + } +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct FunctionCallInput { + pub language: String, + pub chunk: String, + pub function_name: String +} + +pub async fn function_calls_in_chunk(chunk: &str, func_name: &str) -> Option{ + let system_prompt_opt = read_file("/app/prompts/prompt_function_call"); + if system_prompt_opt.is_none() { + log::error!("[function_calls_in_chunk] Unable to read prompt_function_call"); + return None; + } + let system_prompt_lines = system_prompt_opt.expect("Empty system_prompt"); + let func_call_input = FunctionCallInput{ + language: "rust".to_string(), + chunk: chunk.to_string(), + function_name: func_name.to_string() }; + let func_call_input_res = serde_json::to_string(&func_call_input); + if func_call_input_res.is_err() { + let e = func_call_input_res.expect_err("Empty error in func_call_input_res"); + log::error!("[function_calls_in_chunk] Error serializing func call input: {:?}", e); + return None; + } + let func_call_input_str = func_call_input_res.expect("Uncaught error in func_call_input_res"); + let prompt = format!("{}\n\n### User Message\nInput -\n{}\nOutput -", + system_prompt_lines, &func_call_input_str); + let prompt_response_opt = call_llm_api(prompt).await; + if prompt_response_opt.is_none() { + log::error!("[function_calls_in_chunk] Unable to call llm for chunk: {:?}", chunk); + return None; + } + let prompt_response = prompt_response_opt.expect("Empty prompt_response_opt"); + let deserialized_response = serde_json::from_str(&prompt_response); + if deserialized_response.is_err() { + let e = deserialized_response.expect_err("Empty error in deserialized_response"); + log::error!("[function_calls_in_chunk] Error in deserializing response: {:?}", e); + return None; + } + let func_call_chunk: FunctionCallChunk = deserialized_response.expect("Uncuaght error in deserialized_response"); + if func_call_chunk.function_calls.is_empty() { + log::debug!("No function calls in this chunk: {:?}", chunk); + return None; + } + return Some(func_call_chunk); +} + +pub async fn function_calls_in_file(filepath: &PathBuf, func_name: &str) -> Option> { + let mut all_func_calls = Vec::::new(); + let file_contents = std::fs::read_to_string(filepath.clone()).ok()?; + let numbered_content = numbered_content(file_contents); + let chunks = numbered_content.chunks(50); + for chunk in chunks { + let chunk_str = chunk.join("\n"); + let func_call_chunk_opt = function_calls_in_chunk(&chunk_str, func_name).await; + if func_call_chunk_opt.is_none() { + log::debug!("[function_calls_in_file] No function call in chunk for file: {:?}", filepath); + continue; + } + let func_call_chunk = func_call_chunk_opt.expect("Empty func_call_chunk_opt"); + all_func_calls.push(func_call_chunk); + } + if all_func_calls.is_empty() { + log::debug!("[function_calls_in_file] No function call in file: {:?}, {:?}", filepath, func_name); + return None; + } + return Some(all_func_calls); +} + +// pub async fn function_calls_in_hunks(hunk_file_map: &HunkDiffMap) -> Option>>> { +// let mut file_func_call_map: HashMap>> = HashMap::new(); +// for (file, hunk_lines_vec) in hunk_file_map.file_line_map() { +// let file_contents_res = std::fs::read_to_string(file.clone()); +// if file_contents_res.is_err() { +// let e = file_contents_res.expect_err("Empty error in file_contents_res"); +// log::error!("[function_calls_in_hunks] Error in getting file contents: {:?}", e); +// continue; +// } +// let file_contents = file_contents_res.expect("Uncaught error in file_contents_res"); +// let numbered_content = numbered_content(file_contents); +// let file_path = Path::new(file); +// let file_vec = vec![file_path.to_path_buf()]; +// let imports_opt = get_import_lines(&file_vec).await; +// if imports_opt.is_none() { +// log::debug!("[function_calls_in_hunks] No imports in file: {:?}", file); +// continue; +// } +// let file_imports = imports_opt.expect("Empty imports_opt"); +// let file_import_info = file_imports.file_import_info(file).expect("Empty file_import_info"); +// let mut func_call_map: HashMap> = HashMap::new(); +// for import_info in file_import_info.all_import_paths() { +// let func_name = import_info.imported(); +// // TODO FIXME - get numbered content for hunk +// for hunk_lines in hunk_lines_vec { +// let mut func_call_vec: Vec = Vec::new(); +// let hunk_chunk_vec = &numbered_content[hunk_lines.start_line().. hunk_lines.end_line()]; +// for hunk_chunk in hunk_chunk_vec.chunks(30) { +// let hunk_str = hunk_chunk.join("\n"); +// if let Some(func_calls) = function_calls_in_chunk(&hunk_str, func_name).await { +// func_call_vec.extend(func_calls.function_calls()); +// } +// } +// if !func_call_vec.is_empty() { +// func_call_map.entry(func_name.to_string()).or_insert_with(Vec::new).extend(func_call_vec); +// } +// // get func name from imports +// // TODO - git checkout before function call + + +// } +// } +// if !func_call_map.is_empty() { +// file_func_call_map.insert(file.to_string(), func_call_map); +// } +// } +// if file_func_call_map.is_empty() { +// return None; +// } +// return Some(file_func_call_map); +// } +#[derive(Serialize, Deserialize, Debug)] +struct InputSchema { + code_chunk: String, + language: String, + file_path: String, +} + +// Structure for function calls in the output schema +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct FunctionCall { + line_number: u32, + function_name: String, +} + +impl FunctionCall { + pub fn function_name(&self) -> &String { + &self.function_name + } + + pub fn line_number(&self) -> &u32 { + &self.line_number + } +} + +// Output schema structure +#[derive(Default, Serialize, Deserialize, Debug, Clone)] +pub struct FunctionCallsOutput { + function_calls: Vec, + notes: Option, +} + +impl FunctionCallsOutput { + pub fn function_calls(&self) -> &Vec { + return &self.function_calls + } +} + +// Instruction structure +#[derive(Serialize, Deserialize, Debug)] +struct Instructions { + input_schema: InputSchemaDescription, + output_schema: OutputSchemaDescription, + task_description: String, +} + +// Description of input schema +#[derive(Serialize, Deserialize, Debug)] +struct InputSchemaDescription { + code_chunk: String, + language: String, + file_path: String, +} + +// Description of output schema +#[derive(Serialize, Deserialize, Debug)] +struct OutputSchemaDescription { + function_calls: Vec, + notes: String, +} + +// Description for each function call in output +#[derive(Serialize, Deserialize, Debug)] +struct FunctionCallDescription { + line_number: String, + function_name: String, +} + +// Complete structure for JSON input and output +#[derive(Serialize, Deserialize, Debug)] +struct JsonStructure { + instructions: Instructions, + sample_input: InputSchema, + expected_output: FunctionCallsOutput, + input: Option, +} + +impl JsonStructure { + fn set_input(&mut self, input: InputSchema) { + self.input = Some(input); + } +} + +pub struct FunctionCallIdentifier { + prompt: JsonStructure, + chunk_size: usize +} + +impl FunctionCallIdentifier { + pub fn new() -> Option { + let system_prompt_opt = read_file("/app/prompts/prompt_function_calls"); + if system_prompt_opt.is_none() { + log::error!("[function_calls_in_chunk] Unable to read prompt_function_calls"); + return None; + } + let system_prompt_lines = system_prompt_opt.expect("Empty system_prompt"); + let prompt_json_res = serde_json::from_str(&system_prompt_lines); + if prompt_json_res.is_err() { + log::error!("[FunctionCallIdentifier/new] Unable to deserialize prompt_json: {:?}", + prompt_json_res.expect("Empty prompt_json_res")); + return None; + } + let prompt_json: JsonStructure = prompt_json_res.expect("Empty error in prompt_json_res"); + return Some(Self { prompt: prompt_json, chunk_size: 30}); + } + + pub async fn functions_in_file(&mut self, filepath: &PathBuf, lang: &str) -> Option { + // concatenate functioncallsoutput for all chunks + let mut all_func_calls: FunctionCallsOutput = FunctionCallsOutput{ function_calls: vec![], notes: None }; + let file_contents_res = std::fs::read_to_string(filepath.clone()); + if file_contents_res.is_err() { + log::error!( + "[FunctionCallIdentifier/functions_in_file] Unable to read file: {:?}, error: {:?}", + &filepath, file_contents_res.expect_err("Empty error in file_contents_res") + ); + return None; + } + let file_contents = file_contents_res.expect("Uncaught error in file_contents_res"); + let numbered_content = numbered_content(file_contents); + let chunks = numbered_content.chunks(50); + for chunk in chunks { + let chunk_str = chunk.join("\n"); + if let Some(mut func_calls) = self.functions_in_chunk(&chunk_str, filepath, lang).await { + all_func_calls.function_calls.append(&mut func_calls.function_calls); + } + } + if all_func_calls.function_calls.is_empty() { + return None; + } + return Some(all_func_calls); + } + + async fn functions_in_chunk(&mut self, chunk: &str, filepath: &PathBuf, lang: &str) -> Option { + let input = InputSchema{ code_chunk: chunk.to_string(), language: lang.to_string(), + file_path: filepath.to_str().expect("Empty filepath").to_string() }; + self.prompt.input = Some(input); + let prompt_str_res = serde_json::to_string(&self.prompt); + if prompt_str_res.is_err() { + log::error!( + "[FunctionCallIdentifier/functions_in_chunk] Unable to serialize prompt: {:?}", + prompt_str_res.expect_err("Empty error in prompt_str_res")); + return None; + } + let prompt_str = prompt_str_res.expect("Uncaught error in prompt_str_res"); + let final_prompt = format!("{}\nOutput - ", &prompt_str); + let prompt_response_opt = call_llm_api(final_prompt).await; + if prompt_response_opt.is_none() { + log::error!("[FunctionCallIdentifier/functions_in_chunk] Unable to call llm for chunk: {:?}", chunk); + return None; + } + let prompt_response = prompt_response_opt.expect("Empty prompt_response_opt"); + let deserialized_response = serde_json::from_str(&prompt_response); + if deserialized_response.is_err() { + let e = deserialized_response.expect_err("Empty error in deserialized_response"); + log::error!("[FunctionCallIdentifier/functions_in_chunk] Error in deserializing response: {:?}", e); + return None; + } + let func_calls: FunctionCallsOutput = deserialized_response.expect("Empty error in deserialized_response"); + return Some(func_calls); + } + + pub async fn function_calls_in_hunks(&mut self, filepath: &PathBuf, lang: &str, diff_hunks: &Vec) -> Option> { + let func_calls_opt = self.functions_in_file(filepath, lang).await; + + if func_calls_opt.is_none() { + log::debug!("[FunctionCallIdentifier/function_calls_in_hunks] No func calls in {:?}", filepath); + return None; + } + + let func_calls = func_calls_opt.expect("Empty func_calls_opt"); + + // Create a vector to store the result (HunkDiffLines, FunctionCallsOutput) tuples + let mut hunk_func_pairs: Vec<(HunkDiffLines, FunctionCallsOutput)> = Vec::new(); + + // For each hunk, find matching function calls + for hunk in diff_hunks { + // Collect function calls within this hunk's line range + let matching_func_calls: Vec = func_calls + .function_calls + .iter() + .filter(|function_call| { + function_call.line_number >= *hunk.start_line() as u32 && function_call.line_number <= *hunk.end_line() as u32 + }) + .cloned() // Clone the function calls so we can move them into the new FunctionCallsOutput + .collect(); + + // If there are any matching function calls, create a FunctionCallsOutput and pair it with the hunk + if !matching_func_calls.is_empty() { + let mut matching_func_calls_output = func_calls.clone(); + matching_func_calls_output.function_calls = matching_func_calls; + + hunk_func_pairs.push((hunk.clone(), matching_func_calls_output)); + } + } + + if hunk_func_pairs.is_empty() { + None + } else { + Some(hunk_func_pairs) + } + } + +} + +pub fn function_calls_search(review: &Review, function_name: &str, lang: &str) -> Option>{ + let pattern = format!(r"{}\([^\)]*\)", function_name); // Regex pattern for the specific function call + let directory = review.clone_dir(); // The directory to search in (current directory here) + + // Spawn the ripgrep process, adding `-l` for filenames + let rg_command_res = Command::new("rg") + .arg("-l") // Print only filenames that contain matches + .arg("-e") // Use regular expression + .arg(pattern) // The regex pattern for function calls + .arg(directory) // Directory to search + .stdout(Stdio::piped()) // Pipe the output + .spawn(); // Spawn the ripgrep process + if rg_command_res.is_err() { + log::error!("[function_calls_search] error in rg command: {:?}", + rg_command_res.expect_err("Empty error in rg_command_res")); + return None; + } + let rg_command = rg_command_res.expect("Uncaught error in rg_command_res"); + // Capture the stdout of ripgrep + let stdout = rg_command.stdout.expect("Failed to capture stdout"); + let reader = BufReader::new(stdout); + + // Use a HashSet to avoid duplicate filenames + let mut files: HashSet = HashSet::new(); + + // Read the output line by line + for line in reader.lines() { + if let Ok(file) = line { // Each line is an absolute filename with a match + if let Some(file_lang) = detect_language(&file) { + if lang == &file_lang { + files.insert(file); + } + } + } + } + return Some(files); +} \ No newline at end of file diff --git a/vibi-dpu/src/graph/function_line_range.rs b/vibi-dpu/src/graph/function_line_range.rs new file mode 100644 index 0000000..cdffc5f --- /dev/null +++ b/vibi-dpu/src/graph/function_line_range.rs @@ -0,0 +1,530 @@ +use std::{collections::HashMap, path::PathBuf}; + +use serde::{Deserialize, Serialize}; + +use crate::graph::utils::numbered_content; + +use super::{function_call::FunctionCall, gitops::HunkDiffLines, utils::{call_llm_api, read_file}}; + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct FuncDefInfo { + pub(crate) name: String, + pub(crate) line_start: usize, + pub(crate) line_end: usize, + pub(crate) parent: String, +} + +impl PartialEq for FuncDefInfo { + fn eq(&self, other: &Self) -> bool { + self.name == other.name && self.line_start == other.line_start + } +} + +impl FuncDefInfo { + pub fn name(&self) -> &String { + &self.name + } + + pub fn line_start(&self) -> &usize { + &self.line_start + } + + pub fn line_end(&self) -> &usize { + &self.line_end + } + + pub fn parent(&self) -> &String { + &self.parent + } +} + +#[derive(Debug, Default, Clone)] +pub struct HunkFuncDef { + func_def: FuncDefInfo, + hunk_info: HunkDiffLines +} + +impl HunkFuncDef { + pub fn func_def(&self) -> &FuncDefInfo { + &self.func_def + } +} + +// Struct to represent function definition +#[derive(Serialize, Deserialize, Debug, Default, Clone)] +struct FunctionDefinition { + line_number: i32, +} + +// Struct to represent the output schema +#[derive(Serialize, Deserialize, Debug)] +pub struct FunctionDefOutput { + function_definition: Option, + notes: Option, +} + +impl FunctionDefOutput { + pub fn get_function_line_number(&self) -> Option { + if let Some(func_def) = &self.function_definition { + return Some(func_def.line_number as usize) + } + return None; + } +} + +// Struct to represent the input schema +#[derive(Serialize, Deserialize, Debug)] +struct InputSchema { + code_chunk: String, + language: String, + function_name: String, +} + +#[derive(Serialize, Deserialize, Debug)] +struct InputSchemaDescription { + code_chunk: String, + language: String, + function_name: String, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct OutputSchemaDescription { + function_definition: FunctionDefinitionDescription, + notes: String, +} +#[derive(Serialize, Deserialize, Debug, Default, Clone)] +struct FunctionDefinitionDescription { + line_number: String, +} + +// Struct for instructions that hold input/output schemas +#[derive(Serialize, Deserialize, Debug)] +struct Instructions { + input_schema: InputSchemaDescription, + output_schema: OutputSchemaDescription, + task_description: String, +} +// Struct for the entire JSON prompt +#[derive(Serialize, Deserialize, Debug)] +struct FunctionDefPrompt { + instructions: Instructions, + sample_input: InputSchema, + expected_output: FunctionDefOutput, + input: Option, +} + +impl FunctionDefPrompt { + fn set_input(&mut self, input: InputSchema) { + self.input = Some(input); + } +} + +pub struct FunctionDefIdentifier { + prompt: FunctionDefPrompt +} + +impl FunctionDefIdentifier { + pub fn new() -> Option { + let system_prompt_opt = read_file("/app/prompts/prompt_function_def"); + if system_prompt_opt.is_none() { + log::error!("[function_calls_in_chunk] Unable to read prompt_function_def"); + return None; + } + let system_prompt_lines = system_prompt_opt.expect("Empty system_prompt"); + let prompt_json_res = serde_json::from_str(&system_prompt_lines); + if prompt_json_res.is_err() { + log::error!("[FunctionCallIdentifier/new] Unable to deserialize prompt_json: {:?}", + prompt_json_res.expect("Empty prompt_json_res")); + return None; + } + let prompt_json: FunctionDefPrompt = prompt_json_res.expect("Empty error in prompt_json_res"); + return Some(Self { prompt: prompt_json}); + } + + pub async fn function_defs_in_file(&mut self, filepath: &PathBuf, lang: &str, function_name: &str) -> Option { + // concatenate functioncallsoutput for all chunks + let file_contents_res = std::fs::read_to_string(filepath.clone()); + if file_contents_res.is_err() { + log::error!( + "[FunctionCallIdentifier/functions_in_file] Unable to read file: {:?}, error: {:?}", + &filepath, file_contents_res.expect_err("Empty error in file_contents_res") + ); + return None; + } + let file_contents = file_contents_res.expect("Uncaught error in file_contents_res"); + let numbered_content = numbered_content(file_contents); + let chunks = numbered_content.chunks(50); + for chunk in chunks { + let chunk_str = chunk.join("\n"); + if let Some(func_defs) = self.function_defs_in_chunk(&chunk_str, filepath, lang, function_name).await { + return Some(func_defs); + } + } + return None; + } + + async fn function_defs_in_chunk(&mut self, chunk: &str, filepath: &PathBuf, lang: &str, function_name: &str) -> Option { + let input = InputSchema{ code_chunk: chunk.to_string(), language: lang.to_string(), + function_name: function_name.to_string() }; + self.prompt.input = Some(input); + let prompt_str_res = serde_json::to_string(&self.prompt); + if prompt_str_res.is_err() { + log::error!( + "[FunctionCallIdentifier/functions_in_chunk] Unable to serialize prompt: {:?}", + prompt_str_res.expect_err("Empty error in prompt_str_res")); + return None; + } + let prompt_str = prompt_str_res.expect("Uncaught error in prompt_str_res"); + let final_prompt = format!("{}\nOutput - ", &prompt_str); + let prompt_response_opt = call_llm_api(final_prompt).await; + if prompt_response_opt.is_none() { + log::error!("[FunctionCallIdentifier/functions_in_chunk] Unable to call llm for chunk: {:?}", chunk); + return None; + } + let prompt_response = prompt_response_opt.expect("Empty prompt_response_opt"); + let deserialized_response = serde_json::from_str(&prompt_response); + if deserialized_response.is_err() { + let e = deserialized_response.expect_err("Empty error in deserialized_response"); + log::error!("[FunctionCallIdentifier/functions_in_chunk] Error in deserializing response: {:?}", e); + return None; + } + let func_calls: FunctionDefOutput = deserialized_response.expect("Empty error in deserialized_response"); + return Some(func_calls); + } +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct FunctionFileMap { + pub(crate) file_name: String, + pub(crate) functions: Vec + // implement a function which takes in starting and ending line numbers of a continous range + // and returns the functions inside the range like Vec of ((start_line, end_line) function_name) +} + +impl FunctionFileMap { + pub fn functions(&self) -> &Vec { + &self.functions + } + + pub fn is_func_in_file(&self, func: &FuncDefInfo) -> bool { + self.functions.contains(func) + } + + pub fn func_def(&self, func_name: &str) -> Option<&FuncDefInfo> { + self.functions.iter().find(|f| f.name == func_name) + } + + pub fn func_at_line(&self, line_num: usize) -> Option<&FuncDefInfo> { + self.functions.iter().find( + |f| f.line_start <= line_num && line_num <= f.line_end) + } + + pub fn funcs_in_hunk(&self, hunk: &HunkDiffLines) -> Vec { + let hunk_func_vec: Vec = self.functions + .iter() + .filter_map(|func| { + // Check if the function's start or end line falls within the hunk's start and end line range + if (func.line_start() >= hunk.start_line() && func.line_start() <= hunk.end_line()) || + (func.line_end() >= hunk.start_line() && func.line_end() <= hunk.end_line()) || + // Additionally check if the function completely spans over the hunk range + (func.line_start() <= hunk.start_line() && func.line_end() >= hunk.end_line()) + { + let hunkfuncdef = HunkFuncDef { + func_def: func.clone(), + hunk_info: hunk.clone(), + }; + return Some(hunkfuncdef); + } + return None; + }).collect(); + return hunk_func_vec; + } + + pub fn funcs_for_func_call(&self, func_call: &FunctionCall) -> Option<&FuncDefInfo>{ + let line_num = func_call.line_number().to_owned() as usize; + return self.func_at_line(line_num); + } + + // pub fn funcs_for_lines(&self, lines: &Vec) -> HashMap { + // let mut line_funcdef_map = HashMap::new(); + + // for line in lines { + // for func in &self.functions { + // if func.line_start <= *line && *line <= func.line_end { + // line_funcdef_map.entry(*line).or_insert(func.clone()); + // } + // } + // } + // return line_funcdef_map; + // } +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct AllFileFunctions { + pub(crate) func_map: HashMap // file name will be key +} + +impl AllFileFunctions { + + pub fn functions_in_file(&self, filename: &str) -> Option<&FunctionFileMap> { + self.func_map.get(filename) + } + + pub fn all_files(&self) -> Vec<&String> { + self.func_map.keys().collect::>() + } +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmFuncDefInput { + language: String, + chunk: String +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmFuncDefRequest { + input: LlmFuncDefInput +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct LlmFuncDef { + #[serde(default)] + name: String, + #[serde(default)] + line_start: usize, + #[serde(default)] + parent: String +} + +impl LlmFuncDef { + pub fn name(&self) -> &String { + &self.name + } + + pub fn line_start(&self) -> &usize { + &self.line_start + } +} +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmFuncDefResponse { + #[serde(default)] + functions: Vec +} + +impl LlmFuncDefResponse { + pub fn functions(&self) -> &Vec { + &self.functions + } +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmFuncBoundaryInput { + language: String, + func_declared: String, + chunk: String + +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmFuncBoundaryRequest { + input: LlmFuncBoundaryInput +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmFuncBoundaryResponse { + function_boundary: i32 +} + +pub async fn generate_function_map(file_paths: &Vec) -> Option { + let mut all_file_functions = AllFileFunctions { func_map: HashMap::new() }; + let system_prompt_opt = read_file("/app/prompts/prompt_function_lines"); + if system_prompt_opt.is_none() { + log::error!("[generate_function_map] Unable to read prompt_function_lines"); + return None; + } + let system_prompt_lines = system_prompt_opt.expect("Empty system_prompt"); + let system_prompt_end_opt = read_file("/app/prompts/prompt_function_boundary"); + if system_prompt_end_opt.is_none() { + log::error!("[generate_function_map] Unable to read prompt_function_boundary"); + return None; + } + let system_prompt_lines_end = system_prompt_end_opt.expect("Empty system_prompt"); + for path in file_paths { + log::debug!("[generate_function_map] path = {:?}", path); + let mut function_map = FunctionFileMap { + file_name: path.to_str().to_owned().unwrap_or("").to_string(), + functions: Vec::new(), + }; + let file_contents_res = std::fs::read_to_string(path.clone()); + if file_contents_res.is_err() { + log::error!("[generate_function_map] Error in reading file contents: {:?}", + file_contents_res.expect_err("Empty error")); + continue; + } + let file_contents = file_contents_res.expect("Uncaught error in file_content_res"); + let numbered_content = numbered_content(file_contents); + let chunks = numbered_content.chunks(30); + for chunk in chunks { + let chunk_str = chunk.join("\n"); + let function_defs_opt = get_function_defs_in_chunk(&chunk_str, &system_prompt_lines).await; + if function_defs_opt.is_none() { + log::error!("[generate_function_map] Unable to get functions from llm"); + continue; + } + let function_defs = function_defs_opt.expect("Empty function_defs"); + for func_def in function_defs.functions.iter() { + if func_def.name.len() == 0 { + log::debug!("[generate_function_map] No valid name for func_def {:?}", &func_def); + continue; + } + let func_boundary_opt = get_function_boundaries_in_chunk(&numbered_content, func_def.line_start, &system_prompt_lines_end).await; + if func_boundary_opt.is_none() { + log::debug!("[generate_function_map] No function end detected for func: {:?}", &func_def); + continue; + } + let func_boundary = func_boundary_opt.expect("Empty func_boundary_opt"); + function_map.functions.push(FuncDefInfo { + name: func_def.name.clone(), + line_start: func_def.line_start, + line_end: func_boundary.function_boundary as usize, + parent: func_def.parent.clone(), + }); + } + } + log::debug!("[generate_function_map] func_map = {:#?}", &function_map); + all_file_functions.func_map.insert(path.to_str().unwrap().to_string(), function_map); + } + return Some(all_file_functions); +} + +pub async fn get_function_def_for_func_call(filepath: &PathBuf, func_call_line_num: usize) -> Option { + let system_prompt_opt = read_file("/app/prompts/prompt_function_lines"); + if system_prompt_opt.is_none() { + log::error!("[get_function_def_for_func_call] Unable to read prompt_function_lines"); + return None; + } + let system_prompt_lines = system_prompt_opt.expect("Empty system_prompt"); + let file_contents_res = std::fs::read_to_string(filepath.clone()); + if file_contents_res.is_err() { + log::error!("[get_function_def_for_func_call] Error in reading file contents: {:?}", + file_contents_res.expect_err("Empty error")); + return None; + } + let file_contents = file_contents_res.expect("Uncaught error in file_content_res"); + let numbered_content = numbered_content(file_contents); + let mut current_line = func_call_line_num; + let chunk_size = 30; + // Loop until we reach the beginning of the file + while current_line > 0 { + // Determine the start and end for the current chunk + let start = if current_line >= chunk_size { + current_line - chunk_size + } else { + 0 + }; + + // Extract the chunk + let chunk_str: String = numbered_content[start..=current_line].join("\n"); + // Process the chunk + let function_defs_opt = get_function_defs_in_chunk(&chunk_str, &system_prompt_lines).await; + if function_defs_opt.is_none() { + log::error!("[generate_function_map] Unable to get functions from llm"); + continue; + } + let function_defs = function_defs_opt.expect("Empty function_defs"); + if let Some(func_def) = function_defs.functions().first() { + return Some(func_def.to_owned()); + } + // Move the current line up by the chunk size + current_line = start; + } + return None; +} + +async fn get_function_defs_in_chunk(chunk: &str, system_prompt: &str) -> Option { + let llm_req = LlmFuncDefRequest { + input: LlmFuncDefInput { + language: "rust".to_string(), + chunk: chunk.to_string() + } + }; + let llm_req_res = serde_json::to_string(&llm_req); + if llm_req_res.is_err() { + log::error!("[get_function_defs_in_chunk] Error in serializing llm req: {}", llm_req_res.expect_err("Empty error in llm_req_res")); + return None; + } + let llm_req_prompt = llm_req_res.expect("Uncaught error in llm_req_res"); + let prompt = format!("{}\n\n### User Message\nInput -\n{}\nOutput -", system_prompt, llm_req_prompt); + match call_llm_api(prompt).await { + None => { + log::error!("[get_function_defs_in_chunk] Failed to call LLM API"); + return None; + } + Some(llm_response) => { + let funcdefs_res = serde_json::from_str(&llm_response); + if funcdefs_res.is_err() { + log::error!( + "[get_function_defs_in_chunk] funcdefs error: {:?}", + funcdefs_res.expect_err("Empty error in funcdefs_res")); + return None; + } + let funcdefs: LlmFuncDefResponse = funcdefs_res.expect("Uncaught error in funcdefs_res"); + return Some(funcdefs); + } + } + // let funcdefs = LlmFuncDefResponse{ functions: vec![LlmFuncDef{ name: "main".to_string(), line_num: 18, parent: "".to_string() }] }; + // return Some(funcdefs); +} + +async fn get_function_boundaries_in_chunk(file_lines_numbered: &Vec, func_def_line_num: usize, system_prompt: &str) -> Option { + // divide lines into chunks and call with each chunk until line_end is found or files is empty + let chunk_size = 40; + let mut start = func_def_line_num; + + while start < file_lines_numbered.len() { + let end = std::cmp::min(start + chunk_size, file_lines_numbered.len()); + let chunk: Vec = file_lines_numbered[start..end].to_vec(); + let chunk_str = chunk.join("\n"); + + let input = LlmFuncBoundaryInput { + language: "rust".to_string(), // Assuming Rust as language, you can modify this as needed + func_declared: file_lines_numbered[func_def_line_num].to_string(), + chunk: chunk_str, + }; + let llm_req = LlmFuncBoundaryRequest { input }; + let llm_req_res = serde_json::to_string(&llm_req); + if llm_req_res.is_err() { + log::error!("[get_function_boundaries_in_chunk] Error in serializing llm req: {}", llm_req_res.expect_err("Empty error in llm_req_res")); + return None; + } + let llm_req_prompt = llm_req_res.expect("Uncaught error in llm_req_res"); + let prompt = format!("{}\n\n### User Message\nInput -\n{}\nOutput -", system_prompt, llm_req_prompt); + match call_llm_api(prompt).await { + None => { + log::error!("[get_function_boundaries_in_chunk] Failed to call LLM API"); + return None; + } + Some(llm_response) => { + let func_resp_res = serde_json::from_str(&llm_response); + if func_resp_res.is_err() { + let e = func_resp_res.expect_err("Empty error func_resp_res"); + log::error!("[get_function_boundaries_in_chunk] Unable to deserialize response"); + return None; + } + let func_resp: LlmFuncBoundaryResponse = func_resp_res.expect("Uncaught error in func_resp_res"); + if func_resp.function_boundary == -1 { + start += chunk_size; + continue; + } + return Some(func_resp); + } + } + // let func_resp = LlmFuncBoundaryResponse { function_boundary: 79 }; + // if func_resp.function_boundary == -1 { + // start += chunk_size; + // continue; + // } + // return Some(func_resp); + } + return None; +} \ No newline at end of file diff --git a/vibi-dpu/src/graph/function_name.rs b/vibi-dpu/src/graph/function_name.rs new file mode 100644 index 0000000..d0e4656 --- /dev/null +++ b/vibi-dpu/src/graph/function_name.rs @@ -0,0 +1,103 @@ +use std::collections::HashMap; + +use serde::{Deserialize, Serialize}; +use super::utils::{call_llm_api, read_file}; + +// Struct to represent the output schema +#[derive(Serialize, Deserialize, Debug)] +pub struct FunctionNameOutput { + function_name: String, + notes: Option, +} + +impl FunctionNameOutput { + pub fn get_function_name(&self) -> &String { + &self.function_name + } +} + +// Struct to represent the input schema +#[derive(Serialize, Deserialize, Debug)] +struct InputSchema { + code_line: String, + language: String, +} + +// Struct for instructions that hold input/output schemas +#[derive(Serialize, Deserialize, Debug)] +struct Instructions { + input_schema: InputSchema, + output_schema: FunctionNameOutput, + task_description: String, +} +// Struct for the entire JSON prompt +#[derive(Serialize, Deserialize, Debug)] +struct FunctionNamePrompt { + instructions: Instructions, + sample_input: InputSchema, + expected_output: FunctionNameOutput, + input: Option, +} + +impl FunctionNamePrompt { + fn set_input(&mut self, input: InputSchema) { + self.input = Some(input); + } +} + +pub struct FunctionNameIdentifier { + prompt: FunctionNamePrompt, + cached_output: HashMap +} + +impl FunctionNameIdentifier { + pub fn new() -> Option { + let system_prompt_opt = read_file("/app/prompts/prompt_function_name"); + if system_prompt_opt.is_none() { + log::error!("[FunctionNameIdentifier/new] Unable to read prompt_function_name"); + return None; + } + let system_prompt_lines = system_prompt_opt.expect("Empty system_prompt"); + let prompt_json_res = serde_json::from_str(&system_prompt_lines); + if prompt_json_res.is_err() { + log::error!("[FunctionNameIdentifier/new] Unable to deserialize prompt_json: {:?}", + prompt_json_res.expect("Empty prompt_json_res")); + return None; + } + let prompt_json: FunctionNamePrompt = prompt_json_res.expect("Empty error in prompt_json_res"); + return Some(Self { prompt: prompt_json, cached_output: HashMap::new()}); + } + + pub async fn function_name_in_line(&mut self, code_line: &str, lang: &str) -> Option { + // concatenate functioncallsoutput for all chunks + if let Some(cached_func_name) = self.cached_output.get(code_line) { + return Some(FunctionNameOutput{ function_name: cached_func_name.to_string(), notes: None }) + } + let input = InputSchema{ code_line: code_line.to_string(), language: lang.to_string() }; + self.prompt.input = Some(input); + let prompt_str_res = serde_json::to_string(&self.prompt); + if prompt_str_res.is_err() { + log::error!( + "[FunctionNameIdentifier/function_name_in_line] Unable to serialize prompt: {:?}", + prompt_str_res.expect_err("Empty error in prompt_str_res")); + return None; + } + let prompt_str = prompt_str_res.expect("Uncaught error in prompt_str_res"); + let final_prompt = format!("{}\nOutput - ", &prompt_str); + let prompt_response_opt = call_llm_api(final_prompt).await; + if prompt_response_opt.is_none() { + log::error!("[FunctionNameIdentifier/function_name_in_line] Unable to call llm for code line: {:?}", code_line); + return None; + } + let prompt_response = prompt_response_opt.expect("Empty prompt_response_opt"); + let deserialized_response = serde_json::from_str(&prompt_response); + if deserialized_response.is_err() { + let e = deserialized_response.expect_err("Empty error in deserialized_response"); + log::error!("[FunctionNameIdentifier/function_name_in_line] Error in deserializing response: {:?}", e); + return None; + } + let func_calls: FunctionNameOutput = deserialized_response.expect("Empty error in deserialized_response"); + self.cached_output.insert(code_line.to_string(), func_calls.get_function_name().to_string()); + return Some(func_calls); + } +} \ No newline at end of file diff --git a/vibi-dpu/src/graph/gitops.rs b/vibi-dpu/src/graph/gitops.rs new file mode 100644 index 0000000..86e8c85 --- /dev/null +++ b/vibi-dpu/src/graph/gitops.rs @@ -0,0 +1,281 @@ +use std::{collections::HashMap, path::{Path, PathBuf}, process::Command, str::{self, FromStr}}; + +use crate::utils::{gitops::StatItem, review::Review}; + +#[derive(Debug, Default, Clone)] +pub struct HunkDiffLines { + start_line: usize, + end_line: usize, + function_line: Option, + line_number: Option, + function_name: Option +} + +impl HunkDiffLines { + pub fn start_line(&self) -> &usize { + &self.start_line + } + + pub fn end_line(&self) -> &usize { + &self.end_line + } + + pub fn function_line(&self) -> &Option { + &self.function_line + } + + pub fn function_name(&self) -> &Option { + &self.function_name + } + + pub fn line_number(&self) -> &Option { + &self.line_number + } + + pub fn set_line_number(&mut self, line_number: Option) { + self.line_number = line_number; + } + + pub fn set_function_name(&mut self, function_name: String) { + self.function_name = Some(function_name); + } +} + +#[derive(Debug, Default, Clone)] +pub struct FileHunks { + deleted_hunks: Vec, + added_hunks: Vec +} + +impl FileHunks { + pub fn deleted_hunks(&self) -> &Vec { + &self.deleted_hunks + } + + pub fn added_hunks(&self) -> &Vec { + &self.added_hunks + } + + // Mutable references to allow modification of the hunks + pub fn deleted_hunks_mut(&mut self) -> &mut Vec { + &mut self.deleted_hunks + } + + pub fn added_hunks_mut(&mut self) -> &mut Vec { + &mut self.added_hunks + } + + pub fn is_func_in_hunks(&self, function_name: &str) -> &Option { + for hunk_lines in self.added_hunks() { + if let Some(func_raw) = hunk_lines.function_line() { + if func_raw.contains(function_name) { + return hunk_lines.line_number(); + } + } + } + for hunk_lines in self.deleted_hunks() { + if let Some(func_raw) = hunk_lines.function_line() { + if func_raw.contains(function_name) { + return hunk_lines.line_number(); + } + } + } + return &None; + } +} + +#[derive(Debug, Default, Clone)] +pub struct HunkDiffMap { + file_line_map: HashMap, +} + +impl HunkDiffMap { + pub fn file_line_map(&self) -> &HashMap { + &self.file_line_map + } + + pub fn file_line_map_mut(&mut self) -> &mut HashMap { + &mut self.file_line_map + } + + pub fn all_files(&self) -> Vec<&String> { + self.file_line_map.keys().collect::>() + } + + pub fn all_files_pathbuf(&self, clone_dir: &str) -> Vec { + let base_path = Path::new(clone_dir); + self.file_line_map.keys() + .filter_map(|s| { + let relative_path = Path::new(s); + let abs_filepath = base_path.join(relative_path); + Some(abs_filepath) + }) + .collect::>() + } + + pub fn file_hunks(&self, filename: &str) -> Option<&FileHunks> { + self.file_line_map.get(filename) + } +} + +pub fn get_changed_hunk_lines(diff_files: &Vec, review: &Review) -> HunkDiffMap { + let mut file_hunk_map = HunkDiffMap { file_line_map: HashMap::new() }; + let prev_commit = review.base_head_commit(); + let curr_commit = review.pr_head_commit(); + let clone_dir = review.clone_dir(); + + for item in diff_files { + let filepath = item.filepath.as_str(); + let commit_range = format!("{}...{}", prev_commit, curr_commit); + log::debug!("[get_changed_hunk_lines] | clone_dir = {:?}, filepath = {:?}", clone_dir, filepath); + + let output_res = Command::new("git") + .arg("diff") + .arg("--unified=0") + .arg("--ignore-space-change") + .arg(&commit_range) + .arg(&filepath) + .current_dir(clone_dir) + .output(); + + if output_res.is_err() { + let commanderr = output_res.expect_err("No error in output_res"); + log::error!("[get_changed_hunk_lines] git diff command failed to start : {:?}", commanderr); + continue; + } + + let result = output_res.expect("Uncaught error in output_res"); + let diff = result.stdout; + let diffstr_res = std::str::from_utf8(&diff); + + if diffstr_res.is_err() { + let e = diffstr_res.expect_err("No error in diffstr_res"); + log::error!("[get_changed_hunk_lines] Unable to deserialize diff: {:?}", e); + continue; + } + + let diffstr = diffstr_res.expect("Uncaught error in diffstr_res"); + log::debug!("[get_changed_hunk_lines] diffstr = {}", &diffstr); + + let mut current_add_start = 0; + let mut current_del_start = 0; + let mut current_add_end = 0; + let mut current_del_end = 0; + let mut in_add_hunk = false; + let mut in_del_hunk = false; + let mut file_hunks = FileHunks { + deleted_hunks: Vec::new(), + added_hunks: Vec::new(), + }; + + // Variable to store the function line + let mut function_line: Option = None; + + for line in diffstr.lines() { + if line.starts_with("@@") { + // Save previous hunks if any + if in_add_hunk { + file_hunks.added_hunks.push(HunkDiffLines { + start_line: current_add_start, + end_line: current_add_end, + function_line: function_line.clone(), // Use the function line stored + line_number: None, + function_name: None + }); + } + if in_del_hunk { + file_hunks.deleted_hunks.push(HunkDiffLines { + start_line: current_del_start, + end_line: current_del_end, + function_line: function_line.clone(), // Use the function line stored + line_number: None, + function_name: None + }); + } + + // Reset states for next hunk + in_add_hunk = false; + in_del_hunk = false; + + // Extract the function name or any string after the last @@ + if let Some(pos) = line.rfind("@@ ") { + function_line = Some(line[(pos+3)..].to_string()); + } else { + function_line = None; // Reset if no valid function line found + } + let parts: Vec<&str> = line.split_whitespace().collect(); + // Determine the start and end lines for the hunks + let del_hunk = parts.get(1); + let add_hunk = parts.get(2); + + if let Some(del_hunk) = del_hunk { + if del_hunk.starts_with('-') { + if let Some((start, len)) = parse_hunk_range(del_hunk) { + current_del_start = start; + current_del_end = start + len - 1; + in_del_hunk = true; + } + } + } + + if let Some(add_hunk) = add_hunk { + if add_hunk.starts_with('+') { + if let Some((start, len)) = parse_hunk_range(add_hunk) { + current_add_start = start; + current_add_end = start + len - 1; + in_add_hunk = true; + } + } + } + } + } + + // Push the last hunks if still in any hunk + if in_add_hunk { + file_hunks.added_hunks.push(HunkDiffLines { + start_line: current_add_start, + end_line: current_add_end, + function_line: function_line.clone(), // Use the function line stored + line_number: None, + function_name: None + }); + } + if in_del_hunk { + file_hunks.deleted_hunks.push(HunkDiffLines { + start_line: current_del_start, + end_line: current_del_end, + function_line: function_line.clone(), // Use the function line stored + line_number: None, + function_name: None + }); + } + + let abs_filepath = Path::new(review.clone_dir()); + let abs_file_pathbuf = abs_filepath.join(Path::new(filepath)); + file_hunk_map.file_line_map.insert( + abs_file_pathbuf.to_str().expect("Unable to deserialize pathbuf").to_string(), + file_hunks, + ); + } + + return file_hunk_map; +} + + +fn parse_hunk_range(hunk: &str) -> Option<(usize, usize)> { + let hunk = hunk.trim_start_matches(&['-', '+'][..]); + let parts: Vec<&str> = hunk.split(',').collect(); + if parts.len() == 1 { + if let Ok(start) = parts[0].parse::() { + return Some((start, 1)); + } + } else if parts.len() == 2 { + if let (Ok(start), Ok(len)) = (parts[0].parse::(), parts[1].parse::()) { + if len == 0 { + return None; + } + return Some((start, len)); + } + } + None +} diff --git a/vibi-dpu/src/graph/graph_edges.rs b/vibi-dpu/src/graph/graph_edges.rs new file mode 100644 index 0000000..ea46478 --- /dev/null +++ b/vibi-dpu/src/graph/graph_edges.rs @@ -0,0 +1,459 @@ +use std::path::{Path, PathBuf}; +use crate::utils::{gitops::git_checkout_commit, review::Review}; + +use super::{elements::MermaidGraphElements, file_imports::ImportIdentifier, function_call::{function_calls_search, FunctionCallIdentifier}, function_line_range::{generate_function_map, get_function_def_for_func_call, FunctionDefIdentifier}, graph_info::DiffGraph, utils::{absolute_to_relative_path, detect_language}}; + +pub async fn graph_edges(base_filepaths: &Vec, review: &Review, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { + outgoing_edges(base_filepaths, diff_graph, graph_elems, review).await; + incoming_edges(review, diff_graph, graph_elems).await; +} + +async fn incoming_edges(review: &Review, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { + // filter files with ripgrep + // for each filtered file + // get func call + // get func def + // for (dest_filename, func_defs) in diff_graph.diff_func_defs() { + // let mut dest_file_rel = dest_filename.to_string(); + // if let Some(dest_file_relative_path) = absolute_to_relative_path(&dest_filename, review) { + // dest_file_rel = dest_file_relative_path; + // } + // let all_files: Vec<&String> = diff_graph.diff_func_defs().keys().collect(); + // for (source_filename, file_func_defs) in diff_graph.all_file_imports().file_import_map() { + // let mut source_rel_path = source_filename.to_string(); + // if let Some(src_relative_filepath) = absolute_to_relative_path(&source_rel_path, review) { + // source_rel_path = src_relative_filepath; + // } + // let file_imports = file_func_defs.all_import_paths(); + // for file_import in file_imports { + // // search for correct import + // if let Some(dest_filepath) = match_imported_filename_to_path(head_filepaths, &file_import.import_path()) { + // if match_import_func(&file_import, dest_func) { + // // find func call + // let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); + // // TODO, FIXME - function_calls_in_file should have src_filename or src_filepath? - check other calls to the function as well + // if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { + // // call func in that takes vec of lines and returns funcdefs + // let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); + // let source_func_defs = diff_graph.all_file_func_defs().functions_in_file(source_filename).expect("No source filename found").funcs_for_lines(&lines); + // for (line_num, source_func_def) in source_func_defs { + // if source_func_def != dest_func.to_owned() { + // graph_elems.add_edge("", + // line_num.to_owned(), + // &source_func_def.name(), + // &dest_func.name(), + // &source_rel_path, + // &dest_file_rel, + // "", + // "green", + // source_func_def.line_start(), + // dest_func.line_start() + // ); + // } + // } + // } + // } + // } + // } + // } + + // // search in full graph + // for (source_filename, file_func_defs) in all_import_info.file_import_map() { + // let mut source_file_rel = source_filename.to_string(); + // if let Some(src_relative_filepath) = absolute_to_relative_path(&source_file_rel, review) { + // source_file_rel = src_relative_filepath; + // } + // let file_imports = file_func_defs.all_import_paths(); + // for file_import in file_imports { + // // search for correct import + // if let Some(dest_filepath) = match_imported_filename_to_path(head_filepaths, file_import.import_path()) { + // if match_import_func(&file_import, dest_func) { + // // if found, create edge + // let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); + // if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { + // // call func in that takes vec of lines and returns funcdefs + // let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); + // let source_func_defs_opt = diff_graph.all_file_func_defs().functions_in_file(source_filename); + // if source_func_defs_opt.is_none() { + // log::debug!("[incoming_edges] No funcs for file: {}", source_filename); + // continue; + // } + // let source_func_defs = source_func_defs_opt.expect("No source filename found").funcs_for_lines(&lines); + // for (line_num, source_func_def) in source_func_defs { + // if source_func_def != dest_func.to_owned() { + // graph_elems.add_edge("", + // line_num.to_owned(), + // &source_func_def.name(), + // &dest_func.name(), + // &source_file_rel, + // &dest_file_rel, + // "", + // "green", + // source_func_def.line_start(), + // dest_func.line_start() + // ); + // } + // } + // } + // } + // } + // } + // } + // } + // for dest_func in func_defs.deleted_func_defs() { + // // search in diff graph + // for (source_filename, file_func_defs) in diff_graph.all_file_imports().file_import_map() { + // let mut source_file_rel = source_filename.to_string(); + // if let Some(src_relative_filepath) = absolute_to_relative_path(&source_file_rel, review) { + // source_file_rel = src_relative_filepath; + // } + // let file_imports = file_func_defs.all_import_paths(); + // for file_import in file_imports { + // // search for correct import + // if let Some(dest_filepath) = match_imported_filename_to_path(head_filepaths, file_import.import_path()) { + // if match_import_func(&file_import, dest_func) { + // // find func call + // git_checkout_commit(review, review.pr_head_commit()); + // let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); + // if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { + // // call func in that takes vec of lines and returns funcdefs + // let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); + // let source_func_defs_opt = diff_graph.all_file_func_defs().functions_in_file(source_filename); + // if source_func_defs_opt.is_none() { + // log::debug!("[incoming_edges] No funcs for file: {}", source_filename); + // continue; + // } + // let source_func_defs = source_func_defs_opt.expect("No source filename found").funcs_for_lines(&lines); + // for (line_num, source_func_def) in source_func_defs { + // if source_func_def != dest_func.to_owned() { + // graph_elems.add_edge("", + // line_num.to_owned(), + // &source_func_def.name(), + // &dest_func.name(), + // &source_file_rel, + // &dest_file_rel, + // "", + // "red", + // source_func_def.line_start(), + // dest_func.line_start() + // ); + // } + // } + // } + // } + // } + // } + // } + // // search in full graph + // for (source_filename, file_func_defs) in all_import_info.file_import_map() { + // let mut source_file_rel = source_filename.to_string(); + // if let Some(src_relative_filepath) = absolute_to_relative_path(&source_file_rel, review) { + // source_file_rel = src_relative_filepath; + // } + // let file_imports = file_func_defs.all_import_paths(); + // for file_import in file_imports { + // // search for correct import + // if let Some(dest_filepath) = match_imported_filename_to_path(head_filepaths, file_import.import_path()) { + // if match_import_func(&file_import, dest_func) { + // // if found, create edge + // let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); + // if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { + // // call func in that takes vec of lines and returns funcdefs + // let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); + // let source_func_defs_opt = diff_graph.all_file_func_defs().functions_in_file(source_filename); + // if source_func_defs_opt.is_none() { + // log::debug!("[incoming_edges] No funcs for file: {}", source_filename); + // continue; + // } + // let source_func_defs = source_func_defs_opt.expect("No source filename found").funcs_for_lines(&lines); + // for (line_num, source_func_def) in source_func_defs { + // if source_func_def != dest_func.to_owned() { + // graph_elems.add_edge("red", + // line_num.to_owned(), + // &source_func_def.name(), + // &dest_func.name(), + // &source_file_rel, + // &dest_file_rel, + // "", + // "red", + // source_func_def.line_start(), + // dest_func.line_start() + // ); + // } + // } + // } + // } + // } + // } + // } + // } + // } + let func_def_identifier_opt = FunctionDefIdentifier::new(); + if func_def_identifier_opt.is_none() { + log::debug!("[outgoing_edges] Unable to create func def identifier"); + return; + } + let mut funcdef_identifier = func_def_identifier_opt.expect("Empty func_def_identifier_opt"); + let func_call_identifier_opt = FunctionCallIdentifier::new(); + if func_call_identifier_opt.is_none() { + log::error!("[incoming_edges] Unable to create new FunctionCallIdentifier"); + return; + } + let mut func_call_identifier = func_call_identifier_opt.expect("Empty func_call_identifier_opt"); + git_checkout_commit(review, review.pr_head_commit()); + process_func_defs( + review, + &mut funcdef_identifier, + diff_graph, + &mut func_call_identifier, + graph_elems, + "green" + ).await; + git_checkout_commit(review, review.base_head_commit()); + process_func_defs( + review, + &mut funcdef_identifier, + diff_graph, + &mut func_call_identifier, + graph_elems, + "red" + ).await; +} + +// fn match_import_func(import_obj: &ImportPath, dest_func_info: &FuncDefInfo) -> bool { +// log::debug!("[match_import_condition] import_obj.imported = {}, dest_func_info = {:#?}", import_obj.imported(), dest_func_info); +// // TODO FIXME - first condition doesn't make sense, it should always be true? - have to check for all calls of this function +// match_overlap(&dest_func_info.name(), +// &import_obj.imported(), +// 0.6) +// || match_overlap(&dest_func_info.parent(), +// &import_obj.imported(), +// 0.6) +// } + +async fn outgoing_edges(base_filepaths: &Vec, diff_graph: &DiffGraph, + graph_elems: &mut MermaidGraphElements, review: &Review) +{ + let func_call_identifier_opt = FunctionCallIdentifier::new(); + if func_call_identifier_opt.is_none() { + log::error!("[incoming_edges] Unable to create new FunctionCallIdentifier"); + return; + } + let mut func_call_identifier = func_call_identifier_opt.expect("Empty func_call_identifier_opt"); + let import_identifier_opt = ImportIdentifier::new(); + if import_identifier_opt.is_none() { + log::debug!("[outgoing_edges] Unable to create import identifier"); + return; + } + let mut import_identifier = import_identifier_opt.expect("Empty import_identifier_opt"); + let func_def_identifier_opt = FunctionDefIdentifier::new(); + if func_def_identifier_opt.is_none() { + log::debug!("[outgoing_edges] Unable to create func def identifier"); + return; + } + let mut funcdef_identifier = func_def_identifier_opt.expect("Empty func_def_identifier_opt"); + git_checkout_commit(review, review.pr_head_commit()); + process_func_calls( + &mut import_identifier, + &mut func_call_identifier, + &mut funcdef_identifier, + review, + diff_graph, + base_filepaths, + graph_elems, + "green").await; + git_checkout_commit(review, review.base_head_commit()); + process_func_calls(&mut import_identifier, + &mut func_call_identifier, + &mut funcdef_identifier, + review, + diff_graph, + base_filepaths, + graph_elems, + "red").await; +} + +async fn process_func_calls(import_identifier: &mut ImportIdentifier, func_call_identifier: &mut FunctionCallIdentifier, + funcdef_identifier: &mut FunctionDefIdentifier, + review: &Review, diff_graph: &DiffGraph, base_filepaths: &Vec, + graph_elems: &mut MermaidGraphElements, edge_color: &str) +{ + for (source_filepath, src_file_hunks) in diff_graph.hunk_diff_map().file_line_map() { + let mut source_file_name = source_filepath.to_owned(); + // get func calls + if let Some(source_file) = absolute_to_relative_path(source_filepath, review) { + source_file_name = source_file.clone(); + } + let diff_hunks; + if edge_color == "green" { + diff_hunks = src_file_hunks.added_hunks(); + } else { + diff_hunks = src_file_hunks.deleted_hunks(); + } + let lang_opt = detect_language(source_filepath); + if lang_opt.is_none() { + log::error!("[get_import_path_file] Unable to determine language: {}", source_filepath); + return; + } + let lang = lang_opt.expect("Empty lang_opt"); + let source_file_path = Path::new(source_filepath); + let source_file_pathbuf = source_file_path.to_path_buf(); + if let Some(hunk_func_calls) = func_call_identifier. + function_calls_in_hunks(&source_file_pathbuf, &lang, diff_hunks).await { + for (hunk_lines, func_call_output) in hunk_func_calls { + for dest_func_call in func_call_output.function_calls() { + if let Some(import_filepath) = import_identifier.get_import_path_file( + source_filepath, &lang, dest_func_call.function_name()).await { + // get file + // get diffgraph all files and see if they contain filepath + let possible_diff_file_paths: Vec<&String> = diff_graph.hunk_diff_map().all_files().into_iter() + .filter(|file_path| file_path.contains(import_filepath.get_matching_import().possible_file_path())).collect(); + if !possible_diff_file_paths.is_empty() { + for possible_diff_file_path in possible_diff_file_paths { + if diff_graph.hunk_diff_map().all_files().contains(&possible_diff_file_path) + { + let hunks_for_func = diff_graph.hunk_diff_map().file_line_map() + .get(possible_diff_file_path).expect("Empty entry in file_line_map"); + if let Some(possible_file_rel) = absolute_to_relative_path(possible_diff_file_path, review) { + if let Some(dest_func_def_line) = hunks_for_func.is_func_in_hunks(dest_func_call.function_name()) { + if let Some(src_func_name) = hunk_lines.function_line() { + if let Some(src_func_line_number) = hunk_lines.line_number() { + graph_elems.add_edge( + edge_color, + dest_func_call.line_number().to_owned() as usize, + src_func_name, + dest_func_call.function_name(), + &source_file_name, + &possible_file_rel, + edge_color, + "", + src_func_line_number, + dest_func_def_line); + } + } + } + } + } + } + } else { + // search all files + // TODO - see if git checkout is needed + let possible_file_pathbufs: Vec<&PathBuf> = base_filepaths.iter() + .filter(|file_path| + file_path.to_string_lossy().contains(import_filepath.get_matching_import().possible_file_path())).collect(); + if !possible_file_pathbufs.is_empty() { + for possible_file_pathbuf in possible_file_pathbufs { + let possible_file_path: String = possible_file_pathbuf.to_string_lossy().to_string(); + // search only for func def with specific name + // if something comes up, add edge! + if let Some(func_defs) = funcdef_identifier.function_defs_in_file( + possible_file_pathbuf, &lang, dest_func_call.function_name()).await { + if let Some(dest_func_def_line) = func_defs.get_function_line_number() { + if let Some(src_func_name) = hunk_lines.function_line() { + if let Some(src_func_line_number) = hunk_lines.line_number() { + if let Some(possible_file_rel) = + absolute_to_relative_path(&possible_file_path, review) { + graph_elems.add_edge( + edge_color, + dest_func_call.line_number().to_owned() as usize, + src_func_name, + dest_func_call.function_name(), + &source_file_name, + &possible_file_rel, + edge_color, + "", + src_func_line_number, + &dest_func_def_line); + } + } + } + } + } + } + } + } + } + } + } + } + } + // get import and path +} + +async fn process_func_defs(review: &Review, funcdef_identifier: &mut FunctionDefIdentifier, + diff_graph: &DiffGraph, func_call_identifier: &mut FunctionCallIdentifier, + graph_elems: &mut MermaidGraphElements, edge_color: &str) +{ + for (dest_filename, dest_file_hunks) in diff_graph.hunk_diff_map().file_line_map() { + let dest_lang_opt = detect_language(&dest_filename); + if dest_lang_opt.is_none() { + log::error!("[process_func_defs] Unable to detect language: {}", dest_filename); + continue; + } + let dest_lang = dest_lang_opt.expect("Empty dest_lang_opt"); + let func_defs; + if edge_color == "red" { + func_defs = dest_file_hunks.deleted_hunks(); + } else { + func_defs = dest_file_hunks.added_hunks(); + } + for dest_func in func_defs { + if let Some(dest_func_name) = dest_func.function_name() { + if let Some(dest_funcdef_line) = dest_func.line_number() { + if let Some(possible_filepaths) = + function_calls_search(review, dest_func_name, &dest_lang) + { + if possible_filepaths.is_empty() { + log::debug!("[process_func_defs] No files detected having function call"); + continue; + } + for possible_filepath in possible_filepaths { + if possible_filepath == *dest_filename { + continue; + } + let lang_opt = detect_language(&possible_filepath); + if lang_opt.is_none() { + log::debug!("[process_func_defs] Unable to determine language: {}", &possible_filepath); + continue; + } + let lang = lang_opt.expect("Empty lang_opt"); + if lang != dest_lang { + log::debug!("[process_func_defs] Different languages: {}, {}", &lang, &dest_lang); + continue; + } + let possible_path = Path::new(&possible_filepath); + let possible_pathbuf = possible_path.to_path_buf(); + // get func call + if let Some(func_calls) = func_call_identifier.functions_in_file(&possible_pathbuf, &lang).await { + // get func def + for func_call in func_calls.function_calls() { + if let Some(src_func_def) = get_function_def_for_func_call( + &possible_pathbuf, func_call.line_number().to_owned() as usize + ).await { + if let Some(source_filename) = absolute_to_relative_path(&possible_filepath, review) { + // add edge + let mut dest_file_rel = dest_filename.to_string(); + if let Some(dest_file_relative_path) = absolute_to_relative_path(&dest_filename, review) { + dest_file_rel = dest_file_relative_path; + } + graph_elems.add_edge(edge_color, + func_call.line_number().to_owned() as usize, + src_func_def.name(), + dest_func_name, + &source_filename, + &dest_file_rel, + "", + edge_color, + src_func_def.line_start(), + dest_funcdef_line); + } + } + } + } + } + } + } + } + } +} +} \ No newline at end of file diff --git a/vibi-dpu/src/graph/graph_info.rs b/vibi-dpu/src/graph/graph_info.rs new file mode 100644 index 0000000..a50520d --- /dev/null +++ b/vibi-dpu/src/graph/graph_info.rs @@ -0,0 +1,348 @@ +use std::{collections::HashMap, path::PathBuf}; +use crate::{graph::function_line_range::generate_function_map, utils::{gitops::{git_checkout_commit, StatItem}, review::Review}}; +use super::{function_call::{FunctionCallChunk, FunctionCallIdentifier, FunctionCallsOutput}, function_line_range::{AllFileFunctions, HunkFuncDef}, function_name::FunctionNameIdentifier, gitops::{get_changed_hunk_lines, HunkDiffMap}, utils::{detect_language, numbered_content, read_file, source_diff_files}}; + +#[derive(Debug, Default, Clone)] +pub struct DiffFuncDefs { + added_func_defs: Vec, + deleted_func_defs: Vec +} + +impl DiffFuncDefs { + pub fn extend_added_funcdefs(&mut self, add_funcdefs: Vec) { + self.added_func_defs.extend(add_funcdefs); + } + + pub fn extend_deleted_funcdefs(&mut self, del_funcdefs: Vec) { + self.deleted_func_defs.extend(del_funcdefs); + } + + pub fn added_func_defs(&self) -> &Vec { + &self.added_func_defs + } + + pub fn deleted_func_defs(&self) -> &Vec { + &self.deleted_func_defs + } +} +#[derive(Debug, Default, Clone)] +pub struct FuncCall { + // import_info: ImportPath, + call_info: Vec +} + +impl FuncCall { + // pub fn import_info(&self) -> &ImportPath { + // &self.import_info + // } + pub fn call_info(&self) -> &Vec { + &self.call_info + } + + // pub fn func_call_hunk_lines(&self, hunk_diff: &HunkDiffLines) -> Option { + // let mut hunk_func_calls_lines = Vec::::new(); + // for func_call in self.call_info() { + // for call_line in func_call.function_calls() { + // if hunk_diff.start_line() <= call_line && hunk_diff.end_line() >= call_line { + // hunk_func_calls_lines.push(call_line.to_owned()); + // } + // } + // } + // if hunk_func_calls_lines.is_empty() { + // return None; + // } + // let hunk_func_call = FuncCall{ + // import_info: self.import_info.clone(), + // call_info: vec![FunctionCallChunk::new(hunk_func_calls_lines, + // self.import_info().imported().to_string())]}; + // return Some(hunk_func_call); + // } + + // pub fn function_name(&self) -> &String { + // self.import_info.imported() + // } +} + +#[derive(Debug, Default, Clone)] +pub struct DiffFuncCall { + added_calls: FunctionCallsOutput, + deleted_calls: FunctionCallsOutput +} + +impl DiffFuncCall { + // pub fn add_added_calls(&mut self, add_calls: FuncCall) { + // self.added_calls.push(add_calls); + // } + + // pub fn add_deleted_calls(&mut self, del_calls: FuncCall) { + // self.deleted_calls.push(del_calls); + // } + + pub fn added_calls(&self) -> &FunctionCallsOutput { + &self.added_calls + } + + pub fn deleted_calls(&self) -> &FunctionCallsOutput { + &self.deleted_calls + } +} + +#[derive(Debug, Default, Clone)] +pub struct DiffGraph { + hunk_diff_map: HunkDiffMap +} + +impl DiffGraph { + pub fn hunk_diff_map(&self) -> &HunkDiffMap { + &self.hunk_diff_map + } + // pub fn add_func_def(&mut self, filename: String, diff_func_defs: DiffFuncDefs) { + // self.diff_func_defs.insert(filename, diff_func_defs); + // } + + // pub fn add_diff_func_calls(&mut self, filename: String, diff_func_calls: DiffFuncCall) { + // self.diff_func_calls.insert(filename, diff_func_calls); + // } + + // pub fn all_file_func_defs(&self) -> &AllFileFunctions { + // &self.diff_files_func_defs + // } + + // // pub fn all_file_imports(&self) -> &FilesImportInfo { + // // &self.diff_files_imports + // // } + + // pub fn diff_func_defs(&self) -> &HashMap { + // &self.diff_func_defs + // } + + // pub fn diff_func_calls(&self) -> &HashMap { + // &self.diff_func_calls + // } + + // pub fn func_calls_for_func(&self, function_name: &str, filename: &str) -> Option<&FuncCall> { + // if let Some(func_call_map) = self.diff_files_func_calls.get(filename) { + // if let Some(func_call) = func_call_map.get(function_name) { + // return Some(func_call) + // } + // } + // return None; + // } +} + +pub async fn generate_diff_graph(diff_files: &Vec, review: &Review) -> Option { + let diff_code_files_opt = source_diff_files(diff_files); + if diff_code_files_opt.is_none() { + log::debug!("[generate_diff_graph] No relevant source diff files in: {:#?}", diff_files); + return None; + } + let diff_code_files = diff_code_files_opt.expect("Empty diff_code_files_opt"); + let mut hunk_diff_map = get_changed_hunk_lines(&diff_code_files, review); + // get func defs for base commit for files in diff + log::debug!("[generate_diff_graph] hunk diff map =======~~~~~~~~ {:#?}", &hunk_diff_map); + let diff_graph_opt = process_hunk_diff(&mut hunk_diff_map, review).await; + return diff_graph_opt; +} + +async fn process_hunk_diff(hunk_diff_map: &mut HunkDiffMap, review: &Review) -> Option { + // full graph func def and import info for diff selected files is required. + let func_name_identifier_opt = FunctionNameIdentifier::new(); + if func_name_identifier_opt.is_none() { + log::error!("[process_hunk_diff] Unable to initialize function name identifier"); + return None; + } + let mut func_name_identifier = func_name_identifier_opt.expect("Empty func_name_identifier_opt"); + git_checkout_commit(review, review.pr_head_commit()); + set_func_def_info(hunk_diff_map, &mut func_name_identifier, true).await; + git_checkout_commit(review, review.base_head_commit()); + set_func_def_info(hunk_diff_map, &mut func_name_identifier, false).await; + let diff_graph = DiffGraph { + hunk_diff_map: hunk_diff_map.to_owned() + }; + return Some(diff_graph); + // let all_diff_files = hunk_diff_map.all_files_pathbuf(review.clone_dir()); + // // do generate function defs , only starting line + // let base_commit_func_defs_opt = generate_function_map(&all_diff_files).await; + // if base_commit_func_defs_opt.is_none() { + // log::debug!("[process_hunk_diff] Unable to generate func defs for base commit"); + // return None; + // } + // let base_commit_func_defs = base_commit_func_defs_opt.expect("Empty let base_commit_func_defs_opt"); + // let base_func_calls_opt = diff_file_func_calls(&all_diff_files, hunk_diff_map, false).await; + // if base_func_calls_opt.is_none() { + // log::debug!("[process_hunk_diff] Unable to calculate diff_file_func_calls"); + // return None; + // } + // let base_func_calls = base_func_calls_opt.expect("Empty base_func_calls_opt"); + // git_checkout_commit(review, &review.pr_head_commit()); + // let diff_func_defs_opt = generate_function_map(&all_diff_files).await; + // // let diff_imports_opt = get_import_lines(&all_diff_files).await; + // // TODO FIXME - opt logic + // if diff_func_defs_opt.is_none() { + // log::debug!("[process_hunk_diff] Unable to generate func definitions diff map"); + // return None; + // } + // let diff_files_func_defs = diff_func_defs_opt.expect("Empty all_file_func_defs_opt)"); + // let diff_files_func_calls_opt = diff_file_func_calls(&all_diff_files, hunk_diff_map, true).await; + // if diff_files_func_calls_opt.is_none() { + // log::debug!("[process_hunk_diff] Unable to calculate diff_file_func_calls"); + // return None; + // } + // let diff_files_func_calls = diff_files_func_calls_opt.expect("Empty diff_files_func_calls_opt"); + + // for filepath in &all_diff_files { + // let filename = filepath.to_str().expect("Unable to deserialize pathbuf"); + // let mut diff_func_defs = DiffFuncDefs { + // added_func_defs: Vec::new(), deleted_func_defs: Vec::new()}; + // // define base and diff func calls output for this filename + // if let Some(base_func_call) = base_func_calls.get(filename) { + // if let Some(diff_func_call) = diff_files_func_calls.get(filename) { + // // initialize and add DiffFuncCall to diff_func_calls_map + // let func_calls = DiffFuncCall { + // added_calls: diff_func_call.to_owned(), deleted_calls: base_func_call.to_owned()}; + // diff_graph.add_diff_func_calls(filename.to_string(), func_calls); + // } + // }; + // if let Some(file_line_map) = hunk_diff_map.file_hunks(filename) { + // for hunk_diff in file_line_map.added_hunks() { + // if let Some(funcs_map) = diff_graph.all_file_func_defs().functions_in_file(filename) { + // // find func_defs for files in hunks + // let funcs_def_vec = funcs_map.funcs_in_hunk(hunk_diff); + // if !funcs_def_vec.is_empty() { + // // add func def vec to something with file as key + // diff_func_defs.extend_added_funcdefs(funcs_def_vec); + // } + // } + // } + // for hunk_diff in file_line_map.deleted_hunks() { + // if let Some(funcs_map) = base_commit_func_defs.functions_in_file(filename) { + // // find func_defs for files in hunks + // let funcs_def_vec = funcs_map.funcs_in_hunk(hunk_diff); + // if !funcs_def_vec.is_empty() { + // // add func def vec to something with file as key + // diff_func_defs.extend_deleted_funcdefs(funcs_def_vec); + // } + // } + // } + // TODO FIXME - why no deleted func calls, and how is only diff part sent to find func calls? + // find func call in hunks for each import + // want to record not all func_calls but hunk specific line numbers + // might need to reorder for loops to make sure repeated calcs are avoided + // if let Some(imports_info) = diff_graph.all_file_imports().file_import_info(filename) { + // for import_info in imports_info.all_import_paths() { + // if let Some(func_call) = diff_graph.func_calls_for_func(import_info.imported(), filename) { + // diff_func_calls_add.add_added_calls(func_call.to_owned()); + // } + // // todo fixme - finding all func calls in file needs a different approach to add added and deleted calls + // // TODO FIXME - need function call calc for all diff files, need to search for funcdefs as well as imports + // // if let Some(func_calls) = function_calls_in_file(&filepath, import_info.imported()).await { + // // // func_calls is basically all func calls of a function in the latest commit of the file + // // if let Some(file_line_map) = hunk_diff_map.file_hunks(filename) { + // // let func_call = FuncCall{ import_info, call_info: func_calls }; + // // for hunk_diff in file_line_map.added_hunks() { + // // if let Some(hunk_func_call) = func_call.func_call_hunk_lines(&hunk_diff) { + // // diff_func_calls_add.add_added_calls(hunk_func_call); + // // } + // // } + // // } + // // } + // } + // } + // // Use full graph's import info + // do a git checkout to base commit + // do the same thing as done for added_calls + // } + // diff_graph.add_func_def(filename.to_string(), diff_func_defs); + // diff_func_calls_map.insert(filename.to_string(), diff_func_calls_add); + } + // git_checkout_commit(review, &review.base_head_commit()); + // for filepath in &all_diff_files { + // let filename = filepath.to_str().expect("Unable to deserialize pathbuf"); + // let diff_func_call = diff_func_calls_map.entry(filename.to_string()).or_insert(DiffFuncCall { added_calls: Vec::new(), deleted_calls: Vec::new() }); + + // // if let Some(imports_info) = base_commit_import_info.file_import_info(filename) { + // // for import_info in imports_info.all_import_paths() { + // // // todo fixme - finding all func calls in file needs a different approach to add added and deleted calls + // // if let Some(func_calls) = function_calls_in_file(&filepath, import_info.imported()).await { + // // // func_calls is basically all func calls of a function in the latest commit of the file + // // if let Some(file_line_map) = hunk_diff_map.file_hunks(filename) { + // // let func_call = FuncCall{ import_info, call_info: func_calls }; + // // for hunk_diff in file_line_map.deleted_hunks() { + // // if let Some(hunk_func_call) = func_call.func_call_hunk_lines(&hunk_diff) { + // // diff_func_call_del.add_deleted_calls(hunk_func_call); + // // } + // // } + // // } + // // } + // // } + // // } + // } + // // for (filename, diff_func_call) in diff_func_calls_map.iter() { + // diff_graph.add_diff_func_calls(filename.to_owned(), diff_func_call.to_owned()); + // } +// return Some(diff_graph); +// } + +// async fn diff_file_func_calls(all_diff_files: &Vec, hunk_diff_map: &HunkDiffMap, added: bool) -> Option>> { +// // func calls made in diff hunks for all diff files +// let mut func_call_file_map = HashMap::new(); +// let func_call_identifier_opt = FunctionCallIdentifier::new(); +// if func_call_identifier_opt.is_none() { +// log::error!("[diff_file_func_calls] Unable to create FunctionCallIdentifier"); +// return None; +// } +// let mut func_call_identifier = func_call_identifier_opt.expect("Empty func_call_identifier_opt"); +// for filepathbuf in all_diff_files { +// let filepath = filepathbuf.to_str().expect("Unable to deserialize pathbuf"); +// let hunk_diffs_opt = hunk_diff_map.file_hunks(filepath); +// if hunk_diffs_opt.is_none() { +// log::debug!("[diff_file_func_calls] No entry in hunk_diff_map for {}", filepath); +// continue; +// } +// let hunk_diffs = hunk_diffs_opt.expect("Empty hunk_diffs_opt"); +// let file_hunks; +// if added { +// file_hunks = hunk_diffs.added_hunks(); +// } else { +// file_hunks = hunk_diffs.deleted_hunks(); +// } +// let func_calls_opt = func_call_identifier.function_calls_in_hunks(filepathbuf, "rust", file_hunks).await; +// if func_calls_opt.is_none() { +// log::debug!("[diff_file_func_calls] No function calls in hunks: {}, {:?}", filepath, hunk_diffs); +// continue; +// } +// let func_calls = func_calls_opt.expect("Empty func_calls_opt"); +// func_call_file_map.insert(filepath.to_string(), func_calls); +// } +// return Some(func_call_file_map); +// } + +async fn set_func_def_info(hunk_diff_map: &mut HunkDiffMap, func_name_identifier: &mut FunctionNameIdentifier, added: bool) { + for (filepath, file_func_diff) in hunk_diff_map.file_line_map_mut() { + let file_hunks; + if added { + file_hunks = file_func_diff.added_hunks_mut(); + } else { + file_hunks = file_func_diff.deleted_hunks_mut(); + } + for file_hunk in file_hunks { + if let Some(func_line_raw) = file_hunk.function_line().clone() { + // get line number + if let Some(file_contents) = read_file(filepath) { + let line_number_opt = file_contents + .lines() // Split into lines + .enumerate() // Get (index, line) + .position(|(_, line)| line.contains(&func_line_raw)) // Find the position where the line matches + .map(|index| index + 1); // Convert 0-based index to 1-based line number + + file_hunk.set_line_number(line_number_opt); + if let Some(lang) = detect_language(filepath) { + if let Some(func_name) = func_name_identifier.function_name_in_line(&func_line_raw, &lang).await { + file_hunk.set_function_name(func_name.get_function_name().to_string()); + } + } + } + } + } + } +} \ No newline at end of file diff --git a/vibi-dpu/src/graph/mermaid_elements.rs b/vibi-dpu/src/graph/mermaid_elements.rs new file mode 100644 index 0000000..cd287b0 --- /dev/null +++ b/vibi-dpu/src/graph/mermaid_elements.rs @@ -0,0 +1,65 @@ + +use crate::{graph::{elements::MermaidGraphElements, graph_edges::graph_edges, graph_info::generate_diff_graph}, utils::{gitops::{git_checkout_commit, StatItem}, review::Review}}; + +use super::{function_call::FunctionCallChunk, function_line_range::{AllFileFunctions, FuncDefInfo, FunctionFileMap}, graph_info::{DiffFuncCall, DiffFuncDefs, DiffGraph, FuncCall}, utils::all_code_files}; + + +pub async fn generate_mermaid_flowchart(diff_files: &Vec, review: &Review) -> Option { + let flowchart_content_res = generate_flowchart_elements(diff_files, review).await; + if flowchart_content_res.is_none() { + log::error!("[generate_mermaid_flowchart] Unable to generate flowchart content, review: {}", review.id()); + return None; + } + let flowchart_content = flowchart_content_res.expect("Empty flowchart_content_res"); + let flowchart_str = format!( + "%%{{init: {{ \ + 'theme': 'neutral', \ + 'themeVariables': {{ \ + 'fontSize': '20px' \ + }}, \ + 'flowchart': {{ \ + 'nodeSpacing': 100, \ + 'rankSpacing': 100 \ + }} \ + }} }}%%\n \ + \tflowchart LR\n{}", + &flowchart_content + ); + return Some(flowchart_str); +} + +async fn generate_flowchart_elements(diff_files: &Vec, review: &Review) -> Option { + // generate full graph for base commit id + git_checkout_commit(review, review.base_head_commit()); + let base_filepaths_opt = all_code_files(review.clone_dir(), diff_files); + if base_filepaths_opt.is_none() { + log::error!( + "[generate_flowchart_elements] Unable to get file paths: {}", review.clone_dir()); + return None; + } + let base_filepaths = base_filepaths_opt.expect("Empty base_filepaths_opt"); + // let base_commit_import_info = get_test_import_info(); + let diff_graph_opt = generate_diff_graph(diff_files, review).await; + log::debug!("[generate_flowchart_elements] diff_graph_opt = {:#?}", &diff_graph_opt); + if diff_graph_opt.is_none() { + log::error!( + "[generate_flowchart_elements] Unable to generate diff graph for review: {}", + review.id()); + return None; + } + let diff_graph = diff_graph_opt.expect("Empty diff_graph_opt"); + // let diff_graph = get_test_diff_graph(); + // let diff_info = generate_diff_info(&full_graph, &diff_graph); + // git_checkout_commit(review, review.pr_head_commit()); + // let head_filepaths_opt = all_code_files(review.clone_dir()); + // if head_filepaths_opt.is_none() { + // log::error!( + // "[generate_flowchart_elements] Unable to get file paths: {}", review.clone_dir()); + // return None; + // } + // let head_filepaths = head_filepaths_opt.expect("Empty head_filepaths_opt"); + let mut graph_elems = MermaidGraphElements::new(); + graph_edges(&base_filepaths, review, &diff_graph, &mut graph_elems).await; + let elems_str = graph_elems.render_elements(review); + return Some(elems_str); +} \ No newline at end of file diff --git a/vibi-dpu/src/graph/mod.rs b/vibi-dpu/src/graph/mod.rs new file mode 100644 index 0000000..849da12 --- /dev/null +++ b/vibi-dpu/src/graph/mod.rs @@ -0,0 +1,10 @@ +pub mod utils; +pub mod gitops; +pub mod mermaid_elements; +pub mod elements; +pub mod function_line_range; +pub mod file_imports; +pub mod graph_info; +pub mod graph_edges; +pub mod function_call; +pub mod function_name; \ No newline at end of file diff --git a/vibi-dpu/src/graph/utils.rs b/vibi-dpu/src/graph/utils.rs new file mode 100644 index 0000000..c558dd2 --- /dev/null +++ b/vibi-dpu/src/graph/utils.rs @@ -0,0 +1,290 @@ +use std::{collections::{HashMap, HashSet}, path::{Path, PathBuf}}; + +use serde::{Deserialize, Serialize}; +use serde_json::{json, Value}; +use walkdir::WalkDir; +use std::fs; +use rand::Rng; + + +use crate::utils::{gitops::StatItem, reqwest_client::get_client, review::Review}; + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmResponse { + model: String, + created_at: String, + response: String, + done: bool +} + +pub async fn call_llm_api(prompt: String) -> Option { + let client = get_client(); + let url = "http://34.100.208.132/api/generate"; + log::debug!("[call_llm_api] Prompt = {:?}", &prompt); + let response_res = client.post(url) + .json(&json!({"model": "phind-codellama", "prompt": prompt})) + .send() + .await; + + if let Err(err) = response_res { + log::error!("[call_llm_api] Error in calling api: {:?}", err); + return None; + } + + let response = response_res.unwrap(); + let mut final_response = String::new(); + let resp_text_res = response.text().await; + if resp_text_res.is_err() { + let e = resp_text_res.expect_err("Empty error in resp_text_res"); + log::error!("[call_llm_api] Error while deserializing response to text: {:?}", e); + return None; + } + let resp_text = resp_text_res.expect("Uncaught error in resp_text"); + // Split the string by the sequence "}\n{" + let split_seq = "}\n{"; + let mut chunks = Vec::new(); + let mut start = 0; + while let Some(pos) = &resp_text[start..].find(split_seq) { + let end = start + pos + 1; + chunks.push(&resp_text[start..end]); + start = end + 1; + } + + // Process each chunk + for chunk in chunks { + // Attempt to fix incomplete chunks + let fixed_chunk = if !chunk.starts_with("{") { + format!("{{{}", chunk) + } else if !chunk.ends_with("}") { + format!("{}{}", chunk, "}") + } else { + chunk.to_string() + }; + let parsed_chunk_res = serde_json::from_str::(&fixed_chunk); + if parsed_chunk_res.is_err() { + let e = parsed_chunk_res.expect_err("Empty error in parsed_chunk_res"); + log::error!("[call_llm_api] Unable to deserialize {}: {:?}", chunk, e); + continue; + } + let parsed_chunk = parsed_chunk_res.expect("Uncaught error in parsed_chunk_res"); + if let Some(parsed_response) = parsed_chunk.get("response").and_then(|v| v.as_str()){ + final_response.push_str(parsed_response); + } + if let Some(done_field) = parsed_chunk.get("done").and_then(|v| v.as_bool()) { + if done_field { + break; + } + } + } + let final_response_trimmed = final_response.trim(); + if final_response_trimmed.starts_with("{") && !final_response_trimmed.ends_with("}") { + final_response.push_str("}"); + } + log::debug!("[call_llm_api] final_response = {:?}", &final_response); + Some(final_response) +} + +pub fn read_file(file: &str) -> Option { + log::debug!("[read_file] file name = {}", &file); + let path = Path::new(file); + let content_res = fs::read_to_string(path); + if !path.exists() { + log::error!("[read_file] File does not exist: {:?}", &path); + return None; + } + if content_res.is_err() { + let err = content_res.expect_err("Empty error in content_res"); + log::error!("[read_file] Error in reading content: {:?}", err); + return None; + } + let content = content_res.expect("Empty content_res"); + Some(content) +} + +pub fn generate_random_string(length: usize) -> String { + const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyz"; + let mut rng = rand::thread_rng(); + let random_string: String = (0..length) + .map(|_| { + let idx = rng.gen_range(0..CHARSET.len()); + CHARSET[idx] as char + }) + .collect(); + random_string +} + +pub fn all_code_files(dir: &str, diff_files: &Vec) -> Option> { + let mut code_files = Vec::::new(); + let all_diff_langs = detect_langs_diff(diff_files); + if all_diff_langs.is_empty() { + log::error!("[all_code_files] No known language files detected in diff"); + return None; + } + for entry in WalkDir::new(dir).into_iter().filter_map(|e| e.ok()) { + let path = entry.path().to_owned(); + log::debug!("[all_code_files] path = {:?}", path); + let ext = path.extension().and_then(|ext| ext.to_str()); + log::debug!("[all_code_files] extension = {:?}", &ext); + if let Some(file_lang) = detect_language(&path.to_string_lossy()) { + if all_diff_langs.contains(&file_lang) { + match path.canonicalize() { + Ok(abs_path) => code_files.push(abs_path), + Err(e) => log::error!("Failed to get absolute path for {:?}: {:?}", path, e), + } + } + } + } + if code_files.is_empty() { + return None; + } + return Some(code_files); +} + +fn detect_langs_diff(diff_files: &Vec) -> HashSet { + let mut all_diff_langs: HashSet = HashSet::new(); + for diff_file in diff_files { + if let Some(diff_lang) = detect_language(&diff_file.filepath) { + all_diff_langs.insert(diff_lang); + } + } + return all_diff_langs; +} + +pub fn match_imported_filename_to_path(paths: &Vec, filename: &str) -> Option { + let relative_path = Path::new(filename); + // Find the first path that matches the filename or relative path + for path in paths { + if path.ends_with(relative_path) { + return Some(path.clone()); // Return the first matching path + } + } + // Return an empty PathBuf or handle the case where no match is found + None +} + +pub fn source_diff_files(diff_files: &Vec) -> Option> { + let mut code_files = Vec::::new(); + for stat_item in diff_files { + let filepath_str = &stat_item.filepath; + let filepath = Path::new(filepath_str); + if let Some(lang) = detect_language(&filepath_str) { + code_files.push(stat_item.clone()); + } + } + if code_files.is_empty() { + return None; + } + return Some(code_files); +} + +pub fn numbered_content(file_contents: String) -> Vec { + let lines = file_contents + .lines() + .enumerate() + .map(|(index, line)| format!("{} {}", index, line)) + .collect::>(); + return lines; +} + +pub fn absolute_to_relative_path(abs_path: &str, review: &Review) -> Option { + let base_path = review.clone_dir(); + let full_path = PathBuf::from(abs_path); + let rel_path_res = full_path.strip_prefix(base_path); + log::debug!("[absolute_to_relative_path] rel_path = {:#?}", &rel_path_res); + log::debug!("[absolute_to_relative_path] full_path = {:?}, base_path = {:?}", &full_path, base_path); + if let Err(e) = rel_path_res { + log::error!("[absolute_to_relative_path] Error in removing prefix: {:?}", e); + return None; + } + let rel_path = rel_path_res.expect("Uncaught error in rel_path_res"); + return Some(rel_path.to_str().expect("Unable to deserialze rel_path").to_string()); +} + +// Generate a map of file extensions to languages or frameworks +fn get_extension_map() -> HashMap<&'static str, &'static str> { + let mut extension_map = HashMap::new(); + + // Common programming languages + extension_map.insert("rs", "Rust"); + extension_map.insert("py", "Python"); + extension_map.insert("js", "JavaScript"); + extension_map.insert("ts", "TypeScript"); + extension_map.insert("java", "Java"); + extension_map.insert("rb", "Ruby"); + extension_map.insert("go", "Go"); + extension_map.insert("cpp", "C++"); + extension_map.insert("cs", "C#"); + extension_map.insert("c", "C"); + extension_map.insert("php", "PHP"); + extension_map.insert("swift", "Swift"); + extension_map.insert("kt", "Kotlin"); + extension_map.insert("m", "Objective-C"); + extension_map.insert("pl", "Perl"); + extension_map.insert("r", "R"); + extension_map.insert("scala", "Scala"); + extension_map.insert("dart", "Dart"); + extension_map.insert("lua", "Lua"); + extension_map.insert("hs", "Haskell"); + extension_map.insert("erl", "Erlang"); + extension_map.insert("ml", "OCaml"); + extension_map.insert("groovy", "Groovy"); + extension_map.insert("sql", "SQL"); + extension_map.insert("v", "V"); + extension_map.insert("nim", "Nim"); + extension_map.insert("elm", "Elm"); + extension_map.insert("jl", "Julia"); + extension_map.insert("cr", "Crystal"); + extension_map.insert("ex", "Elixir"); + extension_map.insert("fs", "F#"); + extension_map.insert("clj", "Clojure"); + extension_map.insert("coffee", "CoffeeScript"); + extension_map.insert("hx", "Haxe"); + extension_map.insert("lisp", "Lisp"); + extension_map.insert("scss", "Sass"); + extension_map.insert("ps1", "PowerShell"); + extension_map.insert("vb", "Visual Basic"); + extension_map.insert("bat", "Batch Script"); + extension_map.insert("matlab", "MATLAB"); + extension_map.insert("vbs", "VBScript"); + extension_map.insert("as", "ActionScript"); + extension_map.insert("rkt", "Racket"); + extension_map.insert("cls", "Apex"); + extension_map.insert("sass", "Sass"); + extension_map.insert("less", "Less"); + + // Web and markup languages + extension_map.insert("html", "HTML"); + extension_map.insert("css", "CSS"); + extension_map.insert("xml", "XML"); + extension_map.insert("md", "Markdown"); + extension_map.insert("adoc", "AsciiDoc"); + extension_map.insert("rst", "reStructuredText"); + + // Frameworks and template languages + extension_map.insert("jsx", "React JSX"); + extension_map.insert("tsx", "React TypeScript TSX"); + extension_map.insert("vue", "Vue.js"); + extension_map.insert("erb", "Ruby on Rails Embedded Ruby"); + extension_map.insert("ejs", "Express.js Embedded JavaScript"); + + // Config and data formats + // extension_map.insert("json", "JSON"); + // extension_map.insert("yaml", "YAML"); + // extension_map.insert("toml", "TOML"); + // extension_map.insert("ini", "INI Config"); + // extension_map.insert("config", "Configuration File"); + + extension_map +} + +// Detect the programming language or framework based on the file extension +pub fn detect_language(file_path: &str) -> Option { + let extension_map = get_extension_map(); + let path = Path::new(file_path); + + // Extract the file extension and match it with the map + path.extension() + .and_then(|ext| ext.to_str()) + .map(|ext| ext.to_lowercase()) // Normalize to lowercase + .and_then(|ext| extension_map.get(ext.as_str()).map(|&lang| lang.to_string())) +} \ No newline at end of file diff --git a/vibi-dpu/src/main.rs b/vibi-dpu/src/main.rs index 14b88d6..6649b54 100644 --- a/vibi-dpu/src/main.rs +++ b/vibi-dpu/src/main.rs @@ -7,6 +7,7 @@ mod github; mod utils; mod logger; mod health; +mod graph; use github::auth::app_access_token; use health::status::send_status_start; use tokio::task; diff --git a/vibi-dpu/src/utils/gitops.rs b/vibi-dpu/src/utils/gitops.rs index d6499c4..4fc05f5 100644 --- a/vibi-dpu/src/utils/gitops.rs +++ b/vibi-dpu/src/utils/gitops.rs @@ -7,6 +7,7 @@ use sha256::digest; use rand::distributions::Alphanumeric; use rand::{thread_rng, Rng}; use tokio::fs; +use tokio::task; use std::io::ErrorKind; use super::hunk::BlameItem; @@ -15,9 +16,9 @@ use super::lineitem::LineItem; use crate::db::repo::save_repo_to_db; use crate::utils::repo::Repository; -#[derive(Debug, Serialize, Default, Deserialize)] +#[derive(Debug, Serialize, Default, Deserialize, Clone)] pub struct StatItem { - filepath: String, + pub filepath: String, additions: i32, deletions: i32, } @@ -89,6 +90,29 @@ pub async fn git_pull(review: &Review, access_token: &str) { }; } +pub fn git_checkout_commit(review: &Review, commit_id: &str) { + let directory = review.clone_dir(); + let output_res = Command::new("git") + .arg("checkout") + .arg(commit_id) + .current_dir(directory) + .output(); + if output_res.is_err() { + let e = output_res.expect_err("No error in output_res"); + log::error!("[git_pull] failed to execute git pull: {:?}", e); + return; + } + let output = output_res.expect("Uncaught error in output_res"); + match str::from_utf8(&output.stderr) { + Ok(v) => log::debug!("[git_pull] git pull stderr = {:?}", v), + Err(e) => {/* error handling */ log::error!("[git_pull] git pull stderr error {}", e)}, + }; + match str::from_utf8(&output.stdout) { + Ok(v) => log::debug!("[git_pull] git pull stdout = {:?}", v), + Err(e) => {/* error handling */ log::error!("[git_pull] git pull stdout error {}", e)}, + }; +} + fn set_git_url(git_url: &str, directory: &str, access_token: &str, repo_provider: &str) { let clone_url_opt = create_clone_url(git_url, access_token, repo_provider); if clone_url_opt.is_none(){ @@ -227,6 +251,7 @@ pub fn generate_diff(review: &Review, smallfiles: &Vec) -> HashMap &Option> { &self.handles } -} +} \ No newline at end of file diff --git a/vibi-dpu/src/utils/repo_config.rs b/vibi-dpu/src/utils/repo_config.rs index 3f74e42..432a8fd 100644 --- a/vibi-dpu/src/utils/repo_config.rs +++ b/vibi-dpu/src/utils/repo_config.rs @@ -3,7 +3,8 @@ use serde::{Serialize, Deserialize}; #[derive(Debug, Serialize, Deserialize, Clone)] pub struct RepoConfig { comment: bool, - auto_assign: bool + auto_assign: bool, + diff_graph: bool } impl RepoConfig { @@ -16,11 +17,16 @@ impl RepoConfig { self.auto_assign } + pub fn diff_graph(&self) -> bool { + self.diff_graph + } + // Function to create a default RepoConfig pub fn default() -> Self { RepoConfig { comment: true, - auto_assign: true + auto_assign: true, + diff_graph: false } } } \ No newline at end of file