@@ -584,12 +584,13 @@ let find_source ~config loc =
584584 | Some s -> s
585585 in
586586 log ~title: " find_source" " initial path: %S" initial_path;
587- let dir = Filename. dirname initial_path in
588- let dir =
587+ let canonical_dir_for_file file =
588+ let raw_dir = Filename. dirname file in
589589 match config.Mconfig. query.directory with
590- | "" -> dir
591- | cwd -> Misc. canonicalize_filename ~cwd dir
590+ | "" -> raw_dir
591+ | cwd -> Misc. canonicalize_filename ~cwd raw_dir
592592 in
593+ let dir = canonical_dir_for_file initial_path in
593594 match Utils. find_all_matches ~config ~with_fallback file with
594595 | [] ->
595596 log ~title: " find_source" " failed to find %S in source path (fallback = %b)"
@@ -608,22 +609,28 @@ let find_source ~config loc =
608609 ~title: (sprintf " find_source(%s)" filename)
609610 " multiple matches in the source path : %s"
610611 (String. concat ~sep: " , " files);
611- try
612+ let files_matching_digest =
612613 match File_switching. source_digest () with
613614 | None ->
614615 log ~title: " find_source"
615616 " ... no source digest available to select the right one" ;
616- raise Not_found
617+ []
617618 | Some digest ->
618619 log ~title: " find_source"
619620 " ... trying to use source digest to find the right one" ;
620621 log ~title: " find_source" " Source digest: %s" (Digest. to_hex digest);
621- Found
622- (List. find files ~f: (fun f ->
623- let fdigest = Digest. file f in
624- log ~title: " find_source" " %s (%s)" f (Digest. to_hex fdigest);
625- fdigest = digest))
626- with Not_found -> (
622+
623+ List. filter files ~f: (fun f ->
624+ let fdigest = Digest. file f in
625+ log ~title: " find_source" " %s (%s)" f (Digest. to_hex fdigest);
626+ fdigest = digest)
627+ in
628+ match files_matching_digest with
629+ | [ file ] ->
630+ log ~title: " find_source" " ... found exactly one file with matching digest" ;
631+ Found file
632+ | [] -> (
633+ log ~title: " find_source" " ... found no files with matching digest" ;
627634 log ~title: " find_source" " ... using heuristic to select the right one" ;
628635 log ~title: " find_source" " we are looking for a file named %s in %s" fname
629636 dir;
@@ -660,7 +667,66 @@ let find_source ~config loc =
660667 match lst with
661668 | (i1 , _ ) :: (i2 , _ ) :: _ when i1 = i2 -> Multiple_matches files
662669 | (_ , s ) :: _ -> Found s
663- | _ -> assert false ))
670+ | _ -> assert false )
671+ | files_matching_digest ->
672+ log ~title: " find_source" " ... found multiple files with matching digest" ;
673+ log ~title: " find_source"
674+ " ... using directory heuristic to choose the best one" ;
675+ (* Give each source file a score that represents how close its path is to the
676+ target path (the path of the build artifact) and then choose the source file
677+ with the highest score.
678+
679+ The score of a source file is the longest tail of the path of the its
680+ directory that is a subpath of the target path. This is premised on build
681+ systems liking to put artifacts in paths that are similar to the source path.
682+ i.e., dune may put the cmt for foo/bar/baz.ml in
683+ _build/default/foo/bar/.bar.objs/byte/bar__Baz.cmt, so we want to use that
684+ shared foo/bar in the path to disambiguate.
685+
686+ ex:
687+ source file: /a/b/c/d/e/f.ml
688+ target path: /a/b/c/_build/default/d/e/artifacts/f.cmi
689+ score: 2, because /a/b/c/d/e is the source file's directory, and d/e is
690+ the longest tail of it that is a subpath of the target path. *)
691+ let score_file source_file =
692+ (* This is technically quadratic, but
693+ a) most file paths are short
694+ b) in the common case, this is linear because common_prefix_len
695+ will usually fail on the first loop
696+ c) this isn't a hot path - this is only for the uncommon case where there are
697+ two identical files
698+ So the stars would need to align for this to cause performance problems *)
699+ let target_dir = dir in
700+ let source_dir = canonical_dir_for_file source_file in
701+ let target_dir_rev = target_dir |> Misc. split_path |> List. rev in
702+ let source_dir_rev = source_dir |> Misc. split_path |> List. rev in
703+ let rec common_prefix_len a b =
704+ match (a, b) with
705+ | [] , _ | _ , [] -> 0
706+ | a_hd :: a_tl , b_hd :: b_tl ->
707+ if String. equal a_hd b_hd then 1 + common_prefix_len a_tl b_tl
708+ else 0
709+ in
710+ let rec candidates = function
711+ | [] -> []
712+ | _ :: tl as curr -> curr :: candidates tl
713+ in
714+ candidates target_dir_rev
715+ |> List. map ~f: (common_prefix_len source_dir_rev)
716+ |> List. max_elt ~cmp: Int. compare
717+ |> Option. value ~default: 0
718+ in
719+ let files_matching_digest_with_scores =
720+ List. map files_matching_digest ~f: (fun file -> (file, score_file file))
721+ in
722+ (* get the max *)
723+ let best_file, _best_score =
724+ List. max_elt files_matching_digest_with_scores
725+ ~cmp: (fun (_ , a ) (_ , b ) -> Int. compare a b)
726+ |> Option. get
727+ (* theres at least one element, so this is never None *)
728+ in
729+ Found best_file)
664730
665731(* Well, that's just another hack.
666732 [find_source] doesn't like the "-o" option of the compiler. This hack handles
0 commit comments