1- use std:: ops:: Range ;
2-
31use crate :: histogram:: lcs:: find_lcs;
42use crate :: histogram:: list_pool:: { ListHandle , ListPool } ;
53use crate :: intern:: Token ;
6- use crate :: util:: { strip_common_postfix, strip_common_prefix} ;
7- use crate :: { myers, Sink } ;
4+ use crate :: myers;
85
96mod lcs;
107mod list_pool;
@@ -16,17 +13,15 @@ struct Histogram {
1613 pool : ListPool ,
1714}
1815
19- pub fn diff < S : Sink > (
20- mut before : & [ Token ] ,
21- mut after : & [ Token ] ,
16+ pub fn diff (
17+ before : & [ Token ] ,
18+ after : & [ Token ] ,
19+ removed : & mut [ bool ] ,
20+ added : & mut [ bool ] ,
2221 num_tokens : u32 ,
23- mut sink : S ,
24- ) -> S :: Out {
22+ ) {
2523 let mut histogram = Histogram :: new ( num_tokens) ;
26- let prefix = strip_common_prefix ( & mut before, & mut after) ;
27- strip_common_postfix ( & mut before, & mut after) ;
28- histogram. run ( before, prefix, after, prefix, & mut sink) ;
29- sink. finish ( )
24+ histogram. run ( before, after, removed, added) ;
3025}
3126
3227impl Histogram {
@@ -58,73 +53,49 @@ impl Histogram {
5853 fn run (
5954 & mut self ,
6055 mut before : & [ Token ] ,
61- mut before_off : u32 ,
6256 mut after : & [ Token ] ,
63- mut after_off : u32 ,
64- sink : & mut impl Sink ,
57+ mut removed : & mut [ bool ] ,
58+ mut added : & mut [ bool ] ,
6559 ) {
6660 loop {
6761 if before. is_empty ( ) {
68- if !after. is_empty ( ) {
69- sink. process_change (
70- before_off..before_off,
71- after_off..after_off + after. len ( ) as u32 ,
72- ) ;
73- }
62+ added. fill ( true ) ;
7463 return ;
7564 } else if after. is_empty ( ) {
76- sink. process_change (
77- before_off..before_off + before. len ( ) as u32 ,
78- after_off..after_off,
79- ) ;
65+ removed. fill ( true ) ;
8066 return ;
8167 }
8268
8369 self . populate ( before) ;
8470 match find_lcs ( before, after, self ) {
8571 // no lcs was found, that means that file1 and file2 two have nothing in common
8672 Some ( lcs) if lcs. len == 0 => {
87- sink. process_change (
88- before_off..before_off + before. len ( ) as u32 ,
89- after_off..after_off + after. len ( ) as u32 ,
90- ) ;
73+ added. fill ( true ) ;
74+ removed. fill ( true ) ;
9175 return ;
9276 }
9377 Some ( lcs) => {
9478 self . run (
9579 & before[ ..lcs. before_start as usize ] ,
96- before_off,
9780 & after[ ..lcs. after_start as usize ] ,
98- after_off ,
99- sink ,
81+ & mut removed [ ..lcs . before_start as usize ] ,
82+ & mut added [ ..lcs . after_start as usize ] ,
10083 ) ;
10184
10285 // this is equivalent to (tail) recursion but implement as a loop for efficeny reasons
10386 let before_end = lcs. before_start + lcs. len ;
10487 before = & before[ before_end as usize ..] ;
105- before_off += before_end;
88+ removed = & mut removed [ before_end as usize .. ] ;
10689
10790 let after_end = lcs. after_start + lcs. len ;
10891 after = & after[ after_end as usize ..] ;
109- after_off += after_end;
92+ added = & mut added [ after_end as usize .. ] ;
11093 }
11194 None => {
11295 // we are diffing two extremely large repetitive files
11396 // this is a worst case for histogram diff with O(N^2) performance
114- // fallback to myers to maintain linear time complexity
115- myers:: diff (
116- before,
117- after,
118- 0 , // not used by myers
119- |mut before : Range < u32 > , mut after : Range < u32 > | {
120- before. start += before_off;
121- before. end += before_off;
122- after. start += after_off;
123- after. end += after_off;
124- sink. process_change ( before, after)
125- } ,
126- false ,
127- ) ;
97+ // fallback to myers to maintain linear time complxity
98+ myers:: diff ( before, after, removed, added, false ) ;
12899 return ;
129100 }
130101 }
0 commit comments