Skip to content

Commit d0a4a19

Browse files
liam923voodoos
andauthored
Distinguish identical files (#130)
* Create test * Fix bug * Disable test on windows * Fix uncaught exception * Remove `.t` extension from cram config * Disable test --------- Co-authored-by: Ulysse Gérard <thevoodoos@gmail.com>
1 parent 942d289 commit d0a4a19

File tree

15 files changed

+156
-24
lines changed

15 files changed

+156
-24
lines changed

src/analysis/locate.ml

Lines changed: 79 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -584,12 +584,13 @@ let find_source ~config loc =
584584
| Some s -> s
585585
in
586586
log ~title:"find_source" "initial path: %S" initial_path;
587-
let dir = Filename.dirname initial_path in
588-
let dir =
587+
let canonical_dir_for_file file =
588+
let raw_dir = Filename.dirname file in
589589
match config.Mconfig.query.directory with
590-
| "" -> dir
591-
| cwd -> Misc.canonicalize_filename ~cwd dir
590+
| "" -> raw_dir
591+
| cwd -> Misc.canonicalize_filename ~cwd raw_dir
592592
in
593+
let dir = canonical_dir_for_file initial_path in
593594
match Utils.find_all_matches ~config ~with_fallback file with
594595
| [] ->
595596
log ~title:"find_source" "failed to find %S in source path (fallback = %b)"
@@ -608,22 +609,28 @@ let find_source ~config loc =
608609
~title:(sprintf "find_source(%s)" filename)
609610
"multiple matches in the source path : %s"
610611
(String.concat ~sep:" , " files);
611-
try
612+
let files_matching_digest =
612613
match File_switching.source_digest () with
613614
| None ->
614615
log ~title:"find_source"
615616
"... no source digest available to select the right one";
616-
raise Not_found
617+
[]
617618
| Some digest ->
618619
log ~title:"find_source"
619620
"... trying to use source digest to find the right one";
620621
log ~title:"find_source" "Source digest: %s" (Digest.to_hex digest);
621-
Found
622-
(List.find files ~f:(fun f ->
623-
let fdigest = Digest.file f in
624-
log ~title:"find_source" " %s (%s)" f (Digest.to_hex fdigest);
625-
fdigest = digest))
626-
with Not_found -> (
622+
623+
List.filter files ~f:(fun f ->
624+
let fdigest = Digest.file f in
625+
log ~title:"find_source" " %s (%s)" f (Digest.to_hex fdigest);
626+
fdigest = digest)
627+
in
628+
match files_matching_digest with
629+
| [ file ] ->
630+
log ~title:"find_source" "... found exactly one file with matching digest";
631+
Found file
632+
| [] -> (
633+
log ~title:"find_source" "... found no files with matching digest";
627634
log ~title:"find_source" "... using heuristic to select the right one";
628635
log ~title:"find_source" "we are looking for a file named %s in %s" fname
629636
dir;
@@ -660,7 +667,66 @@ let find_source ~config loc =
660667
match lst with
661668
| (i1, _) :: (i2, _) :: _ when i1 = i2 -> Multiple_matches files
662669
| (_, s) :: _ -> Found s
663-
| _ -> assert false))
670+
| _ -> assert false)
671+
| files_matching_digest ->
672+
log ~title:"find_source" "... found multiple files with matching digest";
673+
log ~title:"find_source"
674+
"... using directory heuristic to choose the best one";
675+
(* Give each source file a score that represents how close its path is to the
676+
target path (the path of the build artifact) and then choose the source file
677+
with the highest score.
678+
679+
The score of a source file is the longest tail of the path of the its
680+
directory that is a subpath of the target path. This is premised on build
681+
systems liking to put artifacts in paths that are similar to the source path.
682+
i.e., dune may put the cmt for foo/bar/baz.ml in
683+
_build/default/foo/bar/.bar.objs/byte/bar__Baz.cmt, so we want to use that
684+
shared foo/bar in the path to disambiguate.
685+
686+
ex:
687+
source file: /a/b/c/d/e/f.ml
688+
target path: /a/b/c/_build/default/d/e/artifacts/f.cmi
689+
score: 2, because /a/b/c/d/e is the source file's directory, and d/e is
690+
the longest tail of it that is a subpath of the target path. *)
691+
let score_file source_file =
692+
(* This is technically quadratic, but
693+
a) most file paths are short
694+
b) in the common case, this is linear because common_prefix_len
695+
will usually fail on the first loop
696+
c) this isn't a hot path - this is only for the uncommon case where there are
697+
two identical files
698+
So the stars would need to align for this to cause performance problems *)
699+
let target_dir = dir in
700+
let source_dir = canonical_dir_for_file source_file in
701+
let target_dir_rev = target_dir |> Misc.split_path |> List.rev in
702+
let source_dir_rev = source_dir |> Misc.split_path |> List.rev in
703+
let rec common_prefix_len a b =
704+
match (a, b) with
705+
| [], _ | _, [] -> 0
706+
| a_hd :: a_tl, b_hd :: b_tl ->
707+
if String.equal a_hd b_hd then 1 + common_prefix_len a_tl b_tl
708+
else 0
709+
in
710+
let rec candidates = function
711+
| [] -> []
712+
| _ :: tl as curr -> curr :: candidates tl
713+
in
714+
candidates target_dir_rev
715+
|> List.map ~f:(common_prefix_len source_dir_rev)
716+
|> List.max_elt ~cmp:Int.compare
717+
|> Option.value ~default:0
718+
in
719+
let files_matching_digest_with_scores =
720+
List.map files_matching_digest ~f:(fun file -> (file, score_file file))
721+
in
722+
(* get the max *)
723+
let best_file, _best_score =
724+
List.max_elt files_matching_digest_with_scores
725+
~cmp:(fun (_, a) (_, b) -> Int.compare a b)
726+
|> Option.get
727+
(* theres at least one element, so this is never None *)
728+
in
729+
Found best_file)
664730

665731
(* Well, that's just another hack.
666732
[find_source] doesn't like the "-o" option of the compiler. This hack handles

src/utils/misc.ml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ let remove_file filename =
125125
then Sys.remove filename
126126
with Sys_error _msg -> ()
127127

128-
let rec split_path path acc =
128+
let rec split_path_and_prepend path acc =
129129
match Filename.dirname path with
130130
| dir when dir = path ->
131131
let is_letter c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') in
@@ -142,7 +142,9 @@ let rec split_path path acc =
142142
else dir
143143
in
144144
dir :: acc
145-
| dir -> split_path dir (Filename.basename path :: acc)
145+
| dir -> split_path_and_prepend dir (Filename.basename path :: acc)
146+
147+
let split_path path = split_path_and_prepend path []
146148

147149
(* Deal with case insensitive FS *)
148150

@@ -181,9 +183,9 @@ let exact_file_exists ~dirname ~basename =
181183

182184
let canonicalize_filename ?cwd path =
183185
let parts =
184-
match split_path path [] with
186+
match split_path path with
185187
| dot :: rest when dot = Filename.current_dir_name ->
186-
split_path (match cwd with None -> Sys.getcwd () | Some c -> c) rest
188+
split_path_and_prepend (match cwd with None -> Sys.getcwd () | Some c -> c) rest
187189
| parts -> parts
188190
in
189191
let goup path = function
@@ -230,7 +232,7 @@ let rec expand_glob ~filter acc root = function
230232
Array.fold_left process acc items
231233

232234
let expand_glob ?(filter=fun _ -> true) path acc =
233-
match split_path path [] with
235+
match split_path path with
234236
| [] -> acc
235237
| root :: subs ->
236238
let patterns = List.map ~f:Glob.compile_pattern subs in

src/utils/misc.mli

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -128,11 +128,16 @@ val canonicalize_filename : ?cwd:string -> string -> string
128128
val expand_glob : ?filter:(string -> bool) -> string -> string list -> string list
129129
(* [expand_glob ~filter pattern acc] adds all filenames matching
130130
[pattern] and satistfying the [filter] predicate to [acc]*)
131-
val split_path : string -> string list -> string list
132-
(* [split_path path tail] prepends all components of [path] to [tail],
131+
val split_path : string -> string list
132+
(* [split_path path] returns the components of [path],
133+
including implicit "." if path is not absolute.
134+
[split_path "a/b/c"] = ["."; "a"; "b"; "c"]
135+
[split_path "/a/b/c"] = ["/"; "a"; "b"; "c"]
136+
FIXME: explain windows behavior
137+
*)
138+
val split_path_and_prepend : string -> string list -> string list
139+
(* [split_path_and_prepend path tail] prepends all components of [path] to [tail],
133140
including implicit "." if path is not absolute.
134-
[split_path "a/b/c" []] = ["."; "a"; "b"; "c"]
135-
[split_path "/a/b/c" []] = ["/"; "a"; "b"; "c"]
136141
FIXME: explain windows behavior
137142
*)
138143

src/utils/std.ml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,18 @@ module List = struct
290290
let sort_uniq ~cmp l = dedup_adjacent ~cmp (sort ~cmp l)
291291

292292
let print f () l = "[ " ^ String.concat "; " (List.map (f ()) l) ^ " ]"
293+
294+
let max_elt list ~cmp =
295+
fold_left list ~init:None ~f:(fun acc elt ->
296+
match acc with
297+
| None -> Some elt
298+
| Some max -> if cmp max elt < 0 then Some elt else acc)
299+
300+
let min_elt list ~cmp =
301+
fold_left list ~init:None ~f:(fun acc elt ->
302+
match acc with
303+
| None -> Some elt
304+
| Some min -> if cmp min elt > 0 then Some elt else acc)
293305
end
294306

295307
module Option = struct
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
(executable
2+
(name main)
3+
(libraries lib_a lib_b))
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
let _ = Lib_a.Same.foo
2+
let _ = Lib_b.Same.foo
3+
4+
let _ = Lib_a.Different.foo
5+
let _ = Lib_b.Different.foo
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
(lang dune 2.0)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
(* AAAAA *)
2+
let foo = 10
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
(library
2+
(name lib_a))
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
let foo = 10

0 commit comments

Comments
 (0)