Skip to content

Commit

Permalink
Feature: Custom git diff patch parser
Browse files Browse the repository at this point in the history
Changes:
- Custom made git diff patch parser
  • Loading branch information
DennisJensen95 committed Jul 10, 2023
1 parent 9d101d6 commit 34d65a9
Show file tree
Hide file tree
Showing 6 changed files with 197 additions and 57 deletions.
44 changes: 0 additions & 44 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,5 @@ chrono = "0.4.24"
clap = { version = "4.2.7", features = ["derive"] }
lcov2cobertura = "1.0.1"
mockall = "0.11.4"
patch = { git = "https://github.com/DennisJensen95/git-patch-parser" }
serde = { version = "1.0.130", features = ["derive"]}
serde-xml-rs = "0.6.0"
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM dennisjensen95/coverage-scope:v0.3.3
FROM dennisjensen95/coverage-scope:v0.4.0

COPY entrypoint.sh /entrypoint.sh

Expand Down
15 changes: 4 additions & 11 deletions src/git_diff_parser.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use patch::Patch;

use crate::coberta_xml_parser::Coverage;
use crate::git_patch_parser::parse_patch_string;

pub struct DiffFiles {
pub files: Vec<(String, Vec<usize>)>,
Expand All @@ -10,18 +9,15 @@ pub struct DiffFiles {
impl DiffFiles {
pub fn new(diff_file_string: &str, file_extensions: Vec<String>) -> Self {
// Parse the diff file and return file paths and line numbers changed
let patches = match Patch::from_multiple(diff_file_string) {
Ok(p) => p,
Err(e) => panic!("Error parsing diff file: {e}"),
};
let patches = parse_patch_string(diff_file_string);

let mut files_changed = DiffFiles {
files: Vec::new(),
file_extensions: file_extensions.clone(),
};

for patch in patches {
let file_path = patch.new.path;
let file_path = patch.file_path;

// Only code files
if !file_extensions.iter().any(|ext| file_path.ends_with(ext)) {
Expand All @@ -31,15 +27,12 @@ impl DiffFiles {
let mut lines_changed: Vec<usize> = Vec::new();

for hunk in patch.hunks {
let line_range = hunk.new_range;
let line_range = hunk.new;
for line_number in line_range.start..line_range.start + line_range.count {
lines_changed.push(line_number as usize);
}
}

// Remove b/ from the start of the file path
let file_path = file_path[2..].to_string();

files_changed.files.push((file_path, lines_changed));
}

Expand Down
191 changes: 191 additions & 0 deletions src/git_patch_parser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
pub struct Range {
pub start: u32,
pub count: u32,
}

pub struct Hunk {
pub new: Range,
pub new_lines_count: u32,
}

pub struct Patch {
pub file_path: String,
pub hunks: Vec<Hunk>,
pub changed_lines: Vec<u32>,
}

pub fn parse_patch_string(patch_string: &str) -> Vec<Patch> {
let lines = patch_string.lines();
let mut patches: Vec<Patch> = Vec::new();
let mut current_patch: Option<Patch> = None;
let mut current_hunk: Option<Hunk> = None;
let mut current_line_in_hunk: u32 = 0;

for line in lines {
if line.starts_with("diff --git") {
if let Some(mut patch) = current_patch {
if let Some(hunk) = current_hunk {
patch.hunks.push(hunk);
current_hunk = None;
}

patches.push(patch);
}

// diff --git a/.github/workflows/ci_cd.yml b/.github/workflows/ci_cd.yml
// Split the line and get the last file path
let file_path = match line.split(' ').last() {
Some(path) => path.strip_prefix("b/").unwrap().to_string(),
None => panic!("Error parsing file path from diff line: {line}"),
};

current_patch = Some(Patch {
file_path,
hunks: Vec::new(),
changed_lines: Vec::new(),
});
} else if line.starts_with("@@") {
if let Some(patch) = current_patch.as_mut() {
if let Some(hunk) = current_hunk.take() {
patch.hunks.push(hunk);
}
}

current_line_in_hunk = 0;
let (line_start, hunk_line_count) = parse_hunk_header(line);
current_hunk = Some(Hunk {
new: Range {
start: line_start,
count: hunk_line_count,
},
new_lines_count: 0,
});
} else if line.starts_with('+') {
current_line_in_hunk += 1;

// Skip empty lines
if line.trim() == "+" {
continue;
}

if let Some(hunk) = current_hunk.as_mut() {
hunk.new_lines_count += 1;
if let Some(patch) = current_patch.as_mut() {
patch
.changed_lines
.push(hunk.new.start + current_line_in_hunk);
}
}
} else if line.starts_with('-') {
continue;
} else {
current_line_in_hunk += 1;
}
}

if let Some(mut patch) = current_patch {
if let Some(hunk) = current_hunk {
patch.hunks.push(hunk);
}

patches.push(patch);
}

patches
}

fn parse_hunk_header(hunk_header: &str) -> (u32, u32) {
// @@ -6,6 +6,7 @@ something

// Get string inside of @@ -6,6 +6,7 @@
match hunk_header.split("@@").nth(1) {
Some(line) => {
if !line.contains('+') {
return (0, 0);
}

let new_file_hunk = line.split(' ').nth(2).unwrap();

if !new_file_hunk.contains(',') {
let line_start = new_file_hunk.parse::<u32>().unwrap();
return (line_start, 0);
}

let line_start = new_file_hunk
.split(',')
.next()
.unwrap()
.parse::<u32>()
.unwrap();
let hunk_line_count = new_file_hunk
.split(',')
.nth(1)
.unwrap()
.parse::<u32>()
.unwrap();

(line_start, hunk_line_count)
}
None => panic!("Error parsing line number range from hunk header: {hunk_header}"),
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_parse_hunk_header() {
let hunk_header = "@@ -6,6 +6,7 @@ something";
let (line_start, line_count) = parse_hunk_header(hunk_header);
assert_eq!(line_start, 6);
assert_eq!(line_count, 7);
}

#[test]
fn test_parse_hunk_header_negative_count() {
let hunk_header = "@@ -6,6 @@ something";
let (line_start, line_count) = parse_hunk_header(hunk_header);
assert_eq!(line_start, 0);
assert_eq!(line_count, 0);
}

fn check_parse_diff_file(
str: &str,
patches_count: usize,
hunk_count: usize,
lines_changed: usize,
) {
let patches = parse_patch_string(str);
assert_eq!(patches.len(), patches_count);

// count hunks
let mut diff_file_hunk_count = 0;
let mut diff_file_lines_changed = 0;
for patch in patches {
diff_file_hunk_count += patch.hunks.len();
diff_file_lines_changed += patch.changed_lines.len();
for line in patch.changed_lines {
println!("{}:{}", patch.file_path, line);
}
}

assert_eq!(diff_file_hunk_count, hunk_count);
assert_eq!(diff_file_lines_changed, lines_changed);
}

#[test]
fn test_parse_normal_patch_string() {
// Read string from assets file
let patch_string =
std::fs::read_to_string("assets/diff_files/test_1.diff").expect("Unable to read file");
check_parse_diff_file(&patch_string, 3, 5, 29);
}

#[test]
fn test_parse_tricky_diff_patch() {
let patch_string =
std::fs::read_to_string("assets/diff_files/tricky.diff").expect("Unable to read file");
check_parse_diff_file(&patch_string, 9, 23, 577)
}
}
1 change: 1 addition & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ mod coberta_xml_parser;
mod command_runner;
mod coverage_converter;
mod git_diff_parser;
mod git_patch_parser;

use coberta_xml_parser::Coverage;
use command_runner::{CommandRunner, CommandRunnerTrait};
Expand Down

0 comments on commit 34d65a9

Please sign in to comment.