From a20f83308aeb9b3acbffd419f7569a20893fefcc Mon Sep 17 00:00:00 2001 From: Liam Stevenson Date: Wed, 8 Jan 2025 14:32:47 -0500 Subject: [PATCH 1/6] Create test --- .../locate/distinguish-files.t/bin/dune | 3 ++ .../locate/distinguish-files.t/bin/main.ml | 5 ++++ .../locate/distinguish-files.t/dune-project | 1 + .../distinguish-files.t/lib_a/different.ml | 2 ++ .../locate/distinguish-files.t/lib_a/dune | 2 ++ .../locate/distinguish-files.t/lib_a/same.ml | 1 + .../distinguish-files.t/lib_b/different.ml | 2 ++ .../locate/distinguish-files.t/lib_b/dune | 2 ++ .../locate/distinguish-files.t/lib_b/same.ml | 1 + .../locate/distinguish-files.t/run.t | 28 +++++++++++++++++++ 10 files changed, 47 insertions(+) create mode 100644 tests/test-dirs/locate/distinguish-files.t/bin/dune create mode 100644 tests/test-dirs/locate/distinguish-files.t/bin/main.ml create mode 100644 tests/test-dirs/locate/distinguish-files.t/dune-project create mode 100644 tests/test-dirs/locate/distinguish-files.t/lib_a/different.ml create mode 100644 tests/test-dirs/locate/distinguish-files.t/lib_a/dune create mode 100644 tests/test-dirs/locate/distinguish-files.t/lib_a/same.ml create mode 100644 tests/test-dirs/locate/distinguish-files.t/lib_b/different.ml create mode 100644 tests/test-dirs/locate/distinguish-files.t/lib_b/dune create mode 100644 tests/test-dirs/locate/distinguish-files.t/lib_b/same.ml create mode 100644 tests/test-dirs/locate/distinguish-files.t/run.t diff --git a/tests/test-dirs/locate/distinguish-files.t/bin/dune b/tests/test-dirs/locate/distinguish-files.t/bin/dune new file mode 100644 index 0000000000..b5dd7d8664 --- /dev/null +++ b/tests/test-dirs/locate/distinguish-files.t/bin/dune @@ -0,0 +1,3 @@ +(executable + (name main) + (libraries lib_a lib_b)) diff --git a/tests/test-dirs/locate/distinguish-files.t/bin/main.ml b/tests/test-dirs/locate/distinguish-files.t/bin/main.ml new file mode 100644 index 0000000000..ccbabed03b --- /dev/null +++ b/tests/test-dirs/locate/distinguish-files.t/bin/main.ml @@ -0,0 +1,5 @@ +let _ = Lib_a.Same.foo +let _ = Lib_b.Same.foo + +let _ = Lib_a.Different.foo +let _ = Lib_b.Different.foo diff --git a/tests/test-dirs/locate/distinguish-files.t/dune-project b/tests/test-dirs/locate/distinguish-files.t/dune-project new file mode 100644 index 0000000000..929c696e56 --- /dev/null +++ b/tests/test-dirs/locate/distinguish-files.t/dune-project @@ -0,0 +1 @@ +(lang dune 2.0) diff --git a/tests/test-dirs/locate/distinguish-files.t/lib_a/different.ml b/tests/test-dirs/locate/distinguish-files.t/lib_a/different.ml new file mode 100644 index 0000000000..bd9ffd45ba --- /dev/null +++ b/tests/test-dirs/locate/distinguish-files.t/lib_a/different.ml @@ -0,0 +1,2 @@ +(* AAAAA *) +let foo = 10 diff --git a/tests/test-dirs/locate/distinguish-files.t/lib_a/dune b/tests/test-dirs/locate/distinguish-files.t/lib_a/dune new file mode 100644 index 0000000000..93aa7c4ccc --- /dev/null +++ b/tests/test-dirs/locate/distinguish-files.t/lib_a/dune @@ -0,0 +1,2 @@ +(library + (name lib_a)) diff --git a/tests/test-dirs/locate/distinguish-files.t/lib_a/same.ml b/tests/test-dirs/locate/distinguish-files.t/lib_a/same.ml new file mode 100644 index 0000000000..5b5ecbfc21 --- /dev/null +++ b/tests/test-dirs/locate/distinguish-files.t/lib_a/same.ml @@ -0,0 +1 @@ +let foo = 10 diff --git a/tests/test-dirs/locate/distinguish-files.t/lib_b/different.ml b/tests/test-dirs/locate/distinguish-files.t/lib_b/different.ml new file mode 100644 index 0000000000..d0358288d7 --- /dev/null +++ b/tests/test-dirs/locate/distinguish-files.t/lib_b/different.ml @@ -0,0 +1,2 @@ +(* BBBBB *) +let foo = 10 diff --git a/tests/test-dirs/locate/distinguish-files.t/lib_b/dune b/tests/test-dirs/locate/distinguish-files.t/lib_b/dune new file mode 100644 index 0000000000..4f6662e961 --- /dev/null +++ b/tests/test-dirs/locate/distinguish-files.t/lib_b/dune @@ -0,0 +1,2 @@ +(library + (name lib_b)) diff --git a/tests/test-dirs/locate/distinguish-files.t/lib_b/same.ml b/tests/test-dirs/locate/distinguish-files.t/lib_b/same.ml new file mode 100644 index 0000000000..5b5ecbfc21 --- /dev/null +++ b/tests/test-dirs/locate/distinguish-files.t/lib_b/same.ml @@ -0,0 +1 @@ +let foo = 10 diff --git a/tests/test-dirs/locate/distinguish-files.t/run.t b/tests/test-dirs/locate/distinguish-files.t/run.t new file mode 100644 index 0000000000..3b2669f635 --- /dev/null +++ b/tests/test-dirs/locate/distinguish-files.t/run.t @@ -0,0 +1,28 @@ +Merlin can distinguish between two files with the same name and contents + + $ dune build @check + +Part 1: Test that two files with the same name but different contents can be distinguished + +Get Lib_a.Different.foo + $ $MERLIN single locate -position 4:26 -filename bin/main.ml < bin/main.ml \ + > | jq .value.file -r + $TESTCASE_ROOT/lib_a/different.ml + +Get Lib_b.Different.foo + $ $MERLIN single locate -position 5:26 -filename bin/main.ml < bin/main.ml \ + > | jq .value.file -r + $TESTCASE_ROOT/lib_b/different.ml + +Part 2: Test that two files with the same name and same contents can be distinguished + +TODO: fix this case +Get Lib_a.Same.foo + $ $MERLIN single locate -position 1:22 -filename bin/main.ml < bin/main.ml \ + > | jq .value.file -r + $TESTCASE_ROOT/lib_b/same.ml + +Get Lib_b.Same.foo + $ $MERLIN single locate -position 2:22 -filename bin/main.ml < bin/main.ml \ + > | jq .value.file -r + $TESTCASE_ROOT/lib_b/same.ml From a47809818fe0a677a887165e92353575156c6866 Mon Sep 17 00:00:00 2001 From: Liam Stevenson Date: Wed, 8 Jan 2025 17:25:49 -0500 Subject: [PATCH 2/6] Fix bug --- src/analysis/locate.ml | 171 ++++++++++++------ src/utils/misc.ml | 12 +- src/utils/misc.mli | 13 +- src/utils/std.ml | 12 ++ .../locate/distinguish-files.t/run.t | 3 +- 5 files changed, 148 insertions(+), 63 deletions(-) diff --git a/src/analysis/locate.ml b/src/analysis/locate.ml index 8c6294e3ef..d7baf28130 100644 --- a/src/analysis/locate.ml +++ b/src/analysis/locate.ml @@ -386,12 +386,13 @@ let find_source ~config loc = | Some s -> s in log ~title:"find_source" "initial path: %S" initial_path; - let dir = Filename.dirname initial_path in - let dir = + let canonical_dir_for_file file = + let raw_dir = Filename.dirname file in match config.Mconfig.query.directory with - | "" -> dir - | cwd -> Misc.canonicalize_filename ~cwd dir + | "" -> raw_dir + | cwd -> Misc.canonicalize_filename ~cwd raw_dir in + let dir = canonical_dir_for_file initial_path in match Utils.find_all_matches ~config ~with_fallback file with | [] -> log ~title:"find_source" "failed to find %S in source path (fallback = %b)" @@ -410,59 +411,125 @@ let find_source ~config loc = ~title:(sprintf "find_source(%s)" filename) "multiple matches in the source path : %s" (String.concat ~sep:" , " files); - try - match File_switching.source_digest () with - | None -> - log ~title:"find_source" - "... no source digest available to select the right one"; - raise Not_found - | Some digest -> - log ~title:"find_source" - "... trying to use source digest to find the right one"; - log ~title:"find_source" "Source digest: %s" (Digest.to_hex digest); - Found - (List.find files ~f:(fun f -> - let fdigest = Digest.file f in - log ~title:"find_source" " %s (%s)" f (Digest.to_hex fdigest); - fdigest = digest)) - with Not_found -> ( - log ~title:"find_source" "... using heuristic to select the right one"; - log ~title:"find_source" "we are looking for a file named %s in %s" fname - dir; - let rev = String.reverse (Misc.canonicalize_filename ~cwd:dir fname) in - let lst = - List.map files ~f:(fun path -> - let path' = String.reverse path in - let priority = - (String.common_prefix_len rev path' * 2) - + if Preferences.is_preferred path then 1 else 0 - in - (priority, path)) + match File_switching.source_digest () with + | None -> + log ~title:"find_source" + "... no source digest available to select the right one"; + raise Not_found + | Some digest -> ( + log ~title:"find_source" + "... trying to use source digest to find the right one"; + log ~title:"find_source" "Source digest: %s" (Digest.to_hex digest); + let files_matching_digest = + List.filter files ~f:(fun f -> + let fdigest = Digest.file f in + log ~title:"find_source" " %s (%s)" f (Digest.to_hex fdigest); + fdigest = digest) in - let lst = - (* TODO: remove duplicates in [source_path] instead of using + match files_matching_digest with + | [ file ] -> + log ~title:"find_source" + "... found exactly one file with matching digest"; + Found file + | [] -> ( + log ~title:"find_source" "... found no files with matching digest"; + log ~title:"find_source" "... using heuristic to select the right one"; + log ~title:"find_source" "we are looking for a file named %s in %s" + fname dir; + let rev = String.reverse (Misc.canonicalize_filename ~cwd:dir fname) in + let lst = + List.map files ~f:(fun path -> + let path' = String.reverse path in + let priority = + (String.common_prefix_len rev path' * 2) + + if Preferences.is_preferred path then 1 else 0 + in + (priority, path)) + in + let lst = + (* TODO: remove duplicates in [source_path] instead of using [sort_uniq] here. *) - List.sort_uniq - ~cmp:(fun ((i : int), s) ((j : int), t) -> - let tmp = compare j i in - if tmp <> 0 then tmp - else - match compare s t with - | 0 -> 0 - | n -> ( - (* Check if we are referring to the same files. + List.sort_uniq + ~cmp:(fun ((i : int), s) ((j : int), t) -> + let tmp = compare j i in + if tmp <> 0 then tmp + else + match compare s t with + | 0 -> 0 + | n -> ( + (* Check if we are referring to the same files. Especially useful on OSX case-insensitive FS. FIXME: May be able handle symlinks and non-existing files, CHECK *) - match (File_id.get s, File_id.get t) with - | s', t' when File_id.check s' t' -> 0 - | _ -> n)) - lst - in - match lst with - | (i1, _) :: (i2, _) :: _ when i1 = i2 -> Multiple_matches files - | (_, s) :: _ -> Found s - | _ -> assert false)) + match (File_id.get s, File_id.get t) with + | s', t' when File_id.check s' t' -> 0 + | _ -> n)) + lst + in + match lst with + | (i1, _) :: (i2, _) :: _ when i1 = i2 -> Multiple_matches files + | (_, s) :: _ -> Found s + | _ -> assert false) + | files_matching_digest -> + log ~title:"find_source" "... found multiple files with matching digest"; + log ~title:"find_source" + "... using directory heuristic to choose the best one"; + (* Give each source file a score that represents how close its path is to the + target path (the path of the build artifact) and then choose the source file + with the highest score. + + The score of a source file is the longest tail of the path of the its + directory that is a subpath of the target path. This is premised on build + systems liking to put artifacts in paths that are similar to the source path. + i.e., dune may put the cmt for foo/bar/baz.ml in + _build/default/foo/bar/.bar.objs/byte/bar__Baz.cmt, so we want to use that + shared foo/bar in the path to disambiguate. + + ex: + source file: /a/b/c/d/e/f.ml + target path: /a/b/c/_build/default/d/e/artifacts/f.cmi + score: 2, because /a/b/c/d/e is the source file's directory, and d/e is + the longest tail of it that is a subpath of the target path. *) + let score_file source_file = + (* This is technically quadratic, but + a) most file paths are short + b) in the common case, this is linear because common_prefix_len + will usually fail on the first loop + c) this isn't a hot path - this is only for the uncommon case where there are + two identical files + So the stars would need to align for this to cause performance problems *) + let target_dir = dir in + let source_dir = canonical_dir_for_file source_file in + let target_dir_rev = target_dir |> Misc.split_path |> List.rev in + let source_dir_rev = source_dir |> Misc.split_path |> List.rev in + let rec common_prefix_len a b = + match (a, b) with + | [], _ | _, [] -> 0 + | a_hd :: a_tl, b_hd :: b_tl -> + if String.equal a_hd b_hd then 1 + common_prefix_len a_tl b_tl + else 0 + in + let rec candidates = function + | [] -> [] + | _ :: tl as curr -> curr :: candidates tl + in + candidates target_dir_rev + |> List.map ~f:(common_prefix_len source_dir_rev) + |> List.max_elt ~cmp:Int.compare + |> Option.value ~default:0 + in + let files_matching_digest_with_scores = + List.map files_matching_digest ~f:(fun file -> + (file, score_file file)) + in + (* get the max *) + let best_file, _best_score = + List.max_elt files_matching_digest_with_scores + ~cmp:(fun (_, a) (_, b) -> Int.compare a b) + |> Option.get + (* theres at least one element, so this is never None *) + in + Found best_file)) (* Well, that's just another hack. [find_source] doesn't like the "-o" option of the compiler. This hack handles diff --git a/src/utils/misc.ml b/src/utils/misc.ml index fd7b3b27a8..2516d8df81 100644 --- a/src/utils/misc.ml +++ b/src/utils/misc.ml @@ -356,7 +356,7 @@ let remove_file filename = then Sys.remove filename with Sys_error _msg -> () -let rec split_path path acc = +let rec split_path_and_prepend path acc = match Filename.dirname path with | dir when dir = path -> let is_letter c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') in @@ -373,7 +373,9 @@ let rec split_path path acc = else dir in dir :: acc - | dir -> split_path dir (Filename.basename path :: acc) + | dir -> split_path_and_prepend dir (Filename.basename path :: acc) + +let split_path path = split_path_and_prepend path [] (* Deal with case insensitive FS *) @@ -412,9 +414,9 @@ let exact_file_exists ~dirname ~basename = let canonicalize_filename ?cwd path = let parts = - match split_path path [] with + match split_path path with | dot :: rest when dot = Filename.current_dir_name -> - split_path (match cwd with None -> Sys.getcwd () | Some c -> c) rest + split_path_and_prepend (match cwd with None -> Sys.getcwd () | Some c -> c) rest | parts -> parts in let goup path = function @@ -461,7 +463,7 @@ let rec expand_glob ~filter acc root = function Array.fold_left process acc items let expand_glob ?(filter=fun _ -> true) path acc = - match split_path path [] with + match split_path path with | [] -> acc | root :: subs -> let patterns = List.map ~f:Glob.compile_pattern subs in diff --git a/src/utils/misc.mli b/src/utils/misc.mli index 9c560d2dc9..0708802c2e 100644 --- a/src/utils/misc.mli +++ b/src/utils/misc.mli @@ -129,11 +129,16 @@ val canonicalize_filename : ?cwd:string -> string -> string val expand_glob : ?filter:(string -> bool) -> string -> string list -> string list (* [expand_glob ~filter pattern acc] adds all filenames matching [pattern] and satistfying the [filter] predicate to [acc]*) -val split_path : string -> string list -> string list - (* [split_path path tail] prepends all components of [path] to [tail], +val split_path : string -> string list + (* [split_path path] returns the components of [path], + including implicit "." if path is not absolute. + [split_path "a/b/c"] = ["."; "a"; "b"; "c"] + [split_path "/a/b/c"] = ["/"; "a"; "b"; "c"] + FIXME: explain windows behavior + *) +val split_path_and_prepend : string -> string list -> string list + (* [split_path_and_prepend path tail] prepends all components of [path] to [tail], including implicit "." if path is not absolute. - [split_path "a/b/c" []] = ["."; "a"; "b"; "c"] - [split_path "/a/b/c" []] = ["/"; "a"; "b"; "c"] FIXME: explain windows behavior *) diff --git a/src/utils/std.ml b/src/utils/std.ml index 0457d87a3d..9bcb784acf 100644 --- a/src/utils/std.ml +++ b/src/utils/std.ml @@ -290,6 +290,18 @@ module List = struct let sort_uniq ~cmp l = dedup_adjacent ~cmp (sort ~cmp l) let print f () l = "[ " ^ String.concat "; " (List.map (f ()) l) ^ " ]" + + let max_elt list ~cmp = + fold_left list ~init:None ~f:(fun acc elt -> + match acc with + | None -> Some elt + | Some max -> if cmp max elt < 0 then Some elt else acc) + + let min_elt list ~cmp = + fold_left list ~init:None ~f:(fun acc elt -> + match acc with + | None -> Some elt + | Some min -> if cmp min elt > 0 then Some elt else acc) end module Option = struct diff --git a/tests/test-dirs/locate/distinguish-files.t/run.t b/tests/test-dirs/locate/distinguish-files.t/run.t index 3b2669f635..11da27d200 100644 --- a/tests/test-dirs/locate/distinguish-files.t/run.t +++ b/tests/test-dirs/locate/distinguish-files.t/run.t @@ -16,11 +16,10 @@ Get Lib_b.Different.foo Part 2: Test that two files with the same name and same contents can be distinguished -TODO: fix this case Get Lib_a.Same.foo $ $MERLIN single locate -position 1:22 -filename bin/main.ml < bin/main.ml \ > | jq .value.file -r - $TESTCASE_ROOT/lib_b/same.ml + $TESTCASE_ROOT/lib_a/same.ml Get Lib_b.Same.foo $ $MERLIN single locate -position 2:22 -filename bin/main.ml < bin/main.ml \ From 6cbbd0c22775aa28eb19dd74116224b2280ea39a Mon Sep 17 00:00:00 2001 From: Liam Stevenson Date: Fri, 31 Jan 2025 14:06:26 -0500 Subject: [PATCH 3/6] Update changelog --- CHANGES.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index e38c3afe97..4e95be4384 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,8 @@ unreleased + merlin library - Expose utilities to manipulate typed-holes in `Merlin_analysis.Typed_hole` (#1888) + - `locate` can now disambiguate between files with identical names and contents + (#1882) merlin 5.4.1 ============ From 7d41a6017056e3069e1b91ef0a4b3c6e165a6f2f Mon Sep 17 00:00:00 2001 From: Liam Stevenson Date: Wed, 8 Jan 2025 18:23:55 -0500 Subject: [PATCH 4/6] Disable test on windows --- tests/test-dirs/locate/dune | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-dirs/locate/dune b/tests/test-dirs/locate/dune index 2f1e864e31..9c42ba8280 100755 --- a/tests/test-dirs/locate/dune +++ b/tests/test-dirs/locate/dune @@ -2,7 +2,7 @@ (applies_to looping-substitution mutually-recursive partial-cmt includes issue802 issue845 issue1848 issue1199 issue1524 sig-substs l-413-features module-aliases locate-constrs without-implem without-sig module-decl-aliases - in-implicit-trans-dep) + in-implicit-trans-dep distinguish-files.t) (enabled_if (<> %{os_type} Win32))) From 986ebd0e859d03cd0bcbae7359657015bfd5f2b6 Mon Sep 17 00:00:00 2001 From: Liam Stevenson Date: Fri, 31 Jan 2025 14:46:17 -0500 Subject: [PATCH 5/6] Fix uncaught exception --- src/analysis/locate.ml | 173 ++++++++++++++++++++--------------------- 1 file changed, 86 insertions(+), 87 deletions(-) diff --git a/src/analysis/locate.ml b/src/analysis/locate.ml index d7baf28130..3ce56b3b67 100644 --- a/src/analysis/locate.ml +++ b/src/analysis/locate.ml @@ -411,70 +411,70 @@ let find_source ~config loc = ~title:(sprintf "find_source(%s)" filename) "multiple matches in the source path : %s" (String.concat ~sep:" , " files); - match File_switching.source_digest () with - | None -> - log ~title:"find_source" - "... no source digest available to select the right one"; - raise Not_found - | Some digest -> ( - log ~title:"find_source" - "... trying to use source digest to find the right one"; - log ~title:"find_source" "Source digest: %s" (Digest.to_hex digest); - let files_matching_digest = + let files_matching_digest = + match File_switching.source_digest () with + | None -> + log ~title:"find_source" + "... no source digest available to select the right one"; + [] + | Some digest -> + log ~title:"find_source" + "... trying to use source digest to find the right one"; + log ~title:"find_source" "Source digest: %s" (Digest.to_hex digest); + List.filter files ~f:(fun f -> let fdigest = Digest.file f in log ~title:"find_source" " %s (%s)" f (Digest.to_hex fdigest); fdigest = digest) + in + match files_matching_digest with + | [ file ] -> + log ~title:"find_source" "... found exactly one file with matching digest"; + Found file + | [] -> ( + log ~title:"find_source" "... found no files with matching digest"; + log ~title:"find_source" "... using heuristic to select the right one"; + log ~title:"find_source" "we are looking for a file named %s in %s" fname + dir; + let rev = String.reverse (Misc.canonicalize_filename ~cwd:dir fname) in + let lst = + List.map files ~f:(fun path -> + let path' = String.reverse path in + let priority = + (String.common_prefix_len rev path' * 2) + + if Preferences.is_preferred path then 1 else 0 + in + (priority, path)) in - match files_matching_digest with - | [ file ] -> - log ~title:"find_source" - "... found exactly one file with matching digest"; - Found file - | [] -> ( - log ~title:"find_source" "... found no files with matching digest"; - log ~title:"find_source" "... using heuristic to select the right one"; - log ~title:"find_source" "we are looking for a file named %s in %s" - fname dir; - let rev = String.reverse (Misc.canonicalize_filename ~cwd:dir fname) in - let lst = - List.map files ~f:(fun path -> - let path' = String.reverse path in - let priority = - (String.common_prefix_len rev path' * 2) - + if Preferences.is_preferred path then 1 else 0 - in - (priority, path)) - in - let lst = - (* TODO: remove duplicates in [source_path] instead of using + let lst = + (* TODO: remove duplicates in [source_path] instead of using [sort_uniq] here. *) - List.sort_uniq - ~cmp:(fun ((i : int), s) ((j : int), t) -> - let tmp = compare j i in - if tmp <> 0 then tmp - else - match compare s t with - | 0 -> 0 - | n -> ( - (* Check if we are referring to the same files. + List.sort_uniq + ~cmp:(fun ((i : int), s) ((j : int), t) -> + let tmp = compare j i in + if tmp <> 0 then tmp + else + match compare s t with + | 0 -> 0 + | n -> ( + (* Check if we are referring to the same files. Especially useful on OSX case-insensitive FS. FIXME: May be able handle symlinks and non-existing files, CHECK *) - match (File_id.get s, File_id.get t) with - | s', t' when File_id.check s' t' -> 0 - | _ -> n)) - lst - in - match lst with - | (i1, _) :: (i2, _) :: _ when i1 = i2 -> Multiple_matches files - | (_, s) :: _ -> Found s - | _ -> assert false) - | files_matching_digest -> - log ~title:"find_source" "... found multiple files with matching digest"; - log ~title:"find_source" - "... using directory heuristic to choose the best one"; - (* Give each source file a score that represents how close its path is to the + match (File_id.get s, File_id.get t) with + | s', t' when File_id.check s' t' -> 0 + | _ -> n)) + lst + in + match lst with + | (i1, _) :: (i2, _) :: _ when i1 = i2 -> Multiple_matches files + | (_, s) :: _ -> Found s + | _ -> assert false) + | files_matching_digest -> + log ~title:"find_source" "... found multiple files with matching digest"; + log ~title:"find_source" + "... using directory heuristic to choose the best one"; + (* Give each source file a score that represents how close its path is to the target path (the path of the build artifact) and then choose the source file with the highest score. @@ -490,46 +490,45 @@ let find_source ~config loc = target path: /a/b/c/_build/default/d/e/artifacts/f.cmi score: 2, because /a/b/c/d/e is the source file's directory, and d/e is the longest tail of it that is a subpath of the target path. *) - let score_file source_file = - (* This is technically quadratic, but + let score_file source_file = + (* This is technically quadratic, but a) most file paths are short b) in the common case, this is linear because common_prefix_len will usually fail on the first loop c) this isn't a hot path - this is only for the uncommon case where there are two identical files So the stars would need to align for this to cause performance problems *) - let target_dir = dir in - let source_dir = canonical_dir_for_file source_file in - let target_dir_rev = target_dir |> Misc.split_path |> List.rev in - let source_dir_rev = source_dir |> Misc.split_path |> List.rev in - let rec common_prefix_len a b = - match (a, b) with - | [], _ | _, [] -> 0 - | a_hd :: a_tl, b_hd :: b_tl -> - if String.equal a_hd b_hd then 1 + common_prefix_len a_tl b_tl - else 0 - in - let rec candidates = function - | [] -> [] - | _ :: tl as curr -> curr :: candidates tl - in - candidates target_dir_rev - |> List.map ~f:(common_prefix_len source_dir_rev) - |> List.max_elt ~cmp:Int.compare - |> Option.value ~default:0 + let target_dir = dir in + let source_dir = canonical_dir_for_file source_file in + let target_dir_rev = target_dir |> Misc.split_path |> List.rev in + let source_dir_rev = source_dir |> Misc.split_path |> List.rev in + let rec common_prefix_len a b = + match (a, b) with + | [], _ | _, [] -> 0 + | a_hd :: a_tl, b_hd :: b_tl -> + if String.equal a_hd b_hd then 1 + common_prefix_len a_tl b_tl + else 0 in - let files_matching_digest_with_scores = - List.map files_matching_digest ~f:(fun file -> - (file, score_file file)) + let rec candidates = function + | [] -> [] + | _ :: tl as curr -> curr :: candidates tl in - (* get the max *) - let best_file, _best_score = - List.max_elt files_matching_digest_with_scores - ~cmp:(fun (_, a) (_, b) -> Int.compare a b) - |> Option.get - (* theres at least one element, so this is never None *) - in - Found best_file)) + candidates target_dir_rev + |> List.map ~f:(common_prefix_len source_dir_rev) + |> List.max_elt ~cmp:Int.compare + |> Option.value ~default:0 + in + let files_matching_digest_with_scores = + List.map files_matching_digest ~f:(fun file -> (file, score_file file)) + in + (* get the max *) + let best_file, _best_score = + List.max_elt files_matching_digest_with_scores + ~cmp:(fun (_, a) (_, b) -> Int.compare a b) + |> Option.get + (* theres at least one element, so this is never None *) + in + Found best_file) (* Well, that's just another hack. [find_source] doesn't like the "-o" option of the compiler. This hack handles From 915eff79cfc718970404a904627de345c6dbedad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulysse=20G=C3=A9rard?= Date: Mon, 3 Feb 2025 10:11:59 +0100 Subject: [PATCH 6/6] Remove `.t` extension from cram config --- tests/test-dirs/locate/dune | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-dirs/locate/dune b/tests/test-dirs/locate/dune index 9c42ba8280..3121931a35 100755 --- a/tests/test-dirs/locate/dune +++ b/tests/test-dirs/locate/dune @@ -2,7 +2,7 @@ (applies_to looping-substitution mutually-recursive partial-cmt includes issue802 issue845 issue1848 issue1199 issue1524 sig-substs l-413-features module-aliases locate-constrs without-implem without-sig module-decl-aliases - in-implicit-trans-dep distinguish-files.t) + in-implicit-trans-dep distinguish-files) (enabled_if (<> %{os_type} Win32)))