From 61a37f98dc15a87334116441eb188cb962ca6216 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulysse=20G=C3=A9rard?= Date: Wed, 9 Apr 2025 10:47:06 -0400 Subject: [PATCH 1/7] Downstream: Fix occurrences when the definition's source is hidden (ocaml/merlin#1865) --- CHANGES.md | 7 +++ src/analysis/locate.ml | 29 ++++++++---- src/analysis/locate.mli | 7 ++- src/analysis/occurrences.ml | 6 ++- src/frontend/query_commands.ml | 6 +-- tests/test-dirs/occurrences/no-def-mli-only.t | 47 +++++++++++++++++++ 6 files changed, 84 insertions(+), 18 deletions(-) create mode 100644 tests/test-dirs/occurrences/no-def-mli-only.t diff --git a/CHANGES.md b/CHANGES.md index 1fcd8ef1f..666769b91 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,10 @@ +unreleased +========== + + + merlin binary + - Fix occurrences not working when the definition comes from a hidden source + file (#1865) + merlin 5.2 ========== Thu Sep 26 18:48:42 CEST 2024 diff --git a/src/analysis/locate.ml b/src/analysis/locate.ml index 0d70b9db9..3e2c1f863 100644 --- a/src/analysis/locate.ml +++ b/src/analysis/locate.ml @@ -931,7 +931,14 @@ let from_path ~config ~env ~local_defs ~decl path = log ~title:"find_source" "Found file: %s (%a)" file Logger.fmt (Fun.flip Location.print_loc location); `Found { uid; decl_uid = decl.uid; file; location; approximated } - | `File_not_found _ as otherwise -> otherwise) + | `File_not_found reason -> + `File_not_found + { uid; + decl_uid = decl.uid; + file = reason; + location = loc; + approximated + }) let from_longident ~config ~env ~local_defs nss ident = let str_ident = @@ -1077,21 +1084,25 @@ let get_doc ~config:mconfig ~env ~local_defs ~comments ~pos = let from_path = from_path ~config ~env ~local_defs ~namespace path in begin match from_path with - | `Found { uid; location = loc; _ } -> doc_from_uid ~config ~loc uid - | (`Builtin _ | `Not_in_env _ | `File_not_found _ | `Not_found _) as - otherwise -> otherwise + | `Found { uid; location = loc; _ } + | `File_not_found { uid; location = loc; _ } -> + doc_from_uid ~config ~loc uid + | (`Builtin _ | `Not_in_env _ | `Not_found _) as otherwise -> + otherwise end | `User_input path -> log ~title:"get_doc" "looking for the doc of '%s'" path; begin match from_string ~config ~env ~local_defs ~pos path with - | `Found { uid; location = loc; _ } -> doc_from_uid ~config ~loc uid + | `Found { uid; location = loc; _ } + | `File_not_found { uid; location = loc; _ } -> + doc_from_uid ~config ~loc uid | `At_origin -> `Found_loc { Location.loc_start = pos; loc_end = pos; loc_ghost = true } | `Missing_labels_namespace -> `No_documentation - | (`Builtin _ | `Not_in_env _ | `Not_found _ | `File_not_found _) as - otherwise -> otherwise + | (`Builtin _ | `Not_in_env _ | `Not_found _) as otherwise -> + otherwise end in match doc_from_uid_result with @@ -1127,5 +1138,5 @@ let get_doc ~config:mconfig ~env ~local_defs ~comments ~pos = | `User_input path -> `Builtin path | `Completion_entry (_, path, _) -> `Builtin (Path.name path) end - | (`File_not_found _ | `Not_found _ | `No_documentation | `Not_in_env _) as - otherwise -> otherwise + | (`Not_found _ | `No_documentation | `Not_in_env _) as otherwise -> + otherwise diff --git a/src/analysis/locate.mli b/src/analysis/locate.mli index a1ae7ff0a..1824f54a3 100644 --- a/src/analysis/locate.mli +++ b/src/analysis/locate.mli @@ -72,7 +72,7 @@ val from_path : local_defs:Mtyper.typedtree -> namespace:Env_lookup.Namespace.t -> Path.t -> - [> `File_not_found of string + [> `File_not_found of result | `Found of result | `Builtin of Shape.Uid.t * string | `Not_in_env of string @@ -86,7 +86,7 @@ val from_string : ?let_pun_behavior:Mbrowse.Let_pun_behavior.t -> ?namespaces:Namespace_resolution.t -> string -> - [> `File_not_found of string + [> `File_not_found of result | `Found of result | `Builtin of Shape.Uid.t * string | `Missing_labels_namespace @@ -102,8 +102,7 @@ val get_doc : pos:Lexing.position -> [ `User_input of string | `Completion_entry of Env_lookup.Namespace.t * Path.t * Location.t ] -> - [> `File_not_found of string - | `Found of string + [> `Found of string | `Builtin of string | `Not_found of string * string option | `Not_in_env of string diff --git a/src/analysis/occurrences.ml b/src/analysis/occurrences.ml index ef97c4a69..289a2bf18 100644 --- a/src/analysis/occurrences.ml +++ b/src/analysis/occurrences.ml @@ -212,7 +212,8 @@ let locs_of ~config ~env ~typer_result ~pos ~scope path = | _ -> scope in (node_uid_loc, scope) - | `Found { uid; location; approximated = false; _ } -> + | `Found { uid; location; approximated = false; _ } + | `File_not_found { uid; location; approximated = false; _ } -> log ~title:"locs_of" "Found definition uid using locate: %a " Logger.fmt (fun fmt -> Shape.Uid.print fmt uid); (* There is no way to distinguish uids from interfaces from uids of @@ -222,7 +223,8 @@ let locs_of ~config ~env ~typer_result ~pos ~scope path = are actually linked. *) let scope = if is_in_interface config location then `Buffer else scope in (Some (uid, location), scope) - | `Found { decl_uid; location; approximated = true; _ } -> + | `Found { decl_uid; location; approximated = true; _ } + | `File_not_found { decl_uid; location; approximated = true; _ } -> log ~title:"locs_of" "Approx. definition: %a " Logger.fmt (fun fmt -> Shape.Uid.print fmt decl_uid); (Some (decl_uid, location), `Buffer) diff --git a/src/frontend/query_commands.ml b/src/frontend/query_commands.ml index 9d668eb91..17f6e8327 100644 --- a/src/frontend/query_commands.ml +++ b/src/frontend/query_commands.ml @@ -426,7 +426,7 @@ let dispatch pipeline (type a) : a Query_protocol.t -> a = function | `Not_in_env _ as s -> s | `Not_found _ as s -> s | `Found { file; location; _ } -> `Found (Some file, location.loc_start) - | `File_not_found _ as s -> s) + | `File_not_found { file = reason; _ } -> `File_not_found reason) end | Complete_prefix (prefix, pos, kinds, with_doc, with_types) -> let pipeline, typer = for_completion pipeline pos in @@ -611,8 +611,8 @@ let dispatch pipeline (type a) : a Query_protocol.t -> a = function | `Builtin (_, s) -> Locate.log ~title:"result" "found builtin %s" s; `Builtin s - | (`Not_found _ | `At_origin | `Not_in_env _ | `File_not_found _) as - otherwise -> + | `File_not_found { file = reason; _ } -> `File_not_found reason + | (`Not_found _ | `At_origin | `Not_in_env _) as otherwise -> Locate.log ~title:"result" "not found"; otherwise end diff --git a/tests/test-dirs/occurrences/no-def-mli-only.t b/tests/test-dirs/occurrences/no-def-mli-only.t new file mode 100644 index 000000000..348af344d --- /dev/null +++ b/tests/test-dirs/occurrences/no-def-mli-only.t @@ -0,0 +1,47 @@ + $ cat >noml.mli <<'EOF' + > type t = unit + > EOF + + $ cat >noml.ml <<'EOF' + > type t = unit + > EOF + + $ cat >main.ml <<'EOF' + > let x : Noml.t = () + > let y : Noml.t = () + > EOF + + $ $OCAMLC -c -bin-annot noml.mli noml.ml main.ml + +We remove the source file to mimick cases were generated source files are not +accessible to Merlin. + $ rm noml.ml + +We still expect occurrences of definitions in hidden source files to work + $ $MERLIN single occurrences -identifier-at 2:13 -filename main.ml Date: Wed, 9 Apr 2025 14:59:44 -0400 Subject: [PATCH 2/7] Downstream: Use new uid info to fix jumps and provide occurrences in both the interface and the implementation (ocaml/merlin #1857) --- CHANGES.md | 2 + src/analysis/locate.ml | 206 ++++++++++------ src/analysis/locate.mli | 21 +- src/analysis/occurrences.ml | 144 +++++------ src/frontend/query_commands.ml | 5 + src/ocaml-index/lib/index.ml | 11 +- src/ocaml-index/tests/tests-dirs/interfaces.t | 5 +- .../locate/context-detection/cd-test.t/run.t | 2 +- tests/test-dirs/locate/dune | 2 +- tests/test-dirs/locate/issue1848.t | 82 +++++++ .../occurrences/project-wide/mli-vs-ml.t | 230 +++++++++++++++++- 11 files changed, 548 insertions(+), 162 deletions(-) create mode 100644 tests/test-dirs/locate/issue1848.t diff --git a/CHANGES.md b/CHANGES.md index 666769b91..4688eb026 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,8 @@ unreleased + merlin binary - Fix occurrences not working when the definition comes from a hidden source file (#1865) + - Use new 5.3 features to improve locate behavior in some cases. Merlin no + longer confuses uids from interfaces and implementations. (#1857) merlin 5.2 ========== diff --git a/src/analysis/locate.ml b/src/analysis/locate.ml index 3e2c1f863..13d688bd9 100644 --- a/src/analysis/locate.ml +++ b/src/analysis/locate.ml @@ -33,7 +33,10 @@ let last_location = ref Location.none let { Logger.log } = Logger.for_section "locate" type config = - { mconfig : Mconfig.t; ml_or_mli : [ `ML | `MLI ]; traverse_aliases : bool } + { mconfig : Mconfig.t; + ml_or_mli : [ `ML | `Smart | `MLI ]; + traverse_aliases : bool + } type result = { uid : Shape.Uid.t; @@ -77,6 +80,8 @@ module File : sig val explain_not_found : ?doc_from:string -> string -> t -> [> `File_not_found of string ] + + val is_source : t -> bool end = struct type t = | ML of string @@ -189,6 +194,10 @@ end = struct in `File_not_found msg + + let is_source = function + | ML _ | MLL _ | MLI _ -> true + | CMT _ | CMTI _ | CMS _ | CMSI _ -> false end module Artifact : sig @@ -198,14 +207,9 @@ module Artifact : sig val source_digest : t -> string option val comments : t -> (string * Location.t) list val impl_shape : t -> Shape.t option - val uid_to_loc : - loc_of_decl: - (uid:Shape.Uid.t -> - Typedtree.item_declaration -> - (Shape.Uid.t * Location.t) option) -> - Shape.Uid.t -> - t -> - Location.t option + val declaration_dependencies : + t -> (Cmt_format.dependency_kind * Shape.Uid.t * Shape.Uid.t) list + val uid_to_loc : Shape.Uid.t -> t -> string Location.loc option (** When we look for docstring in external compilation unit we can perform a uid-based search and return the attached comment in the attributes. @@ -240,15 +244,15 @@ end = struct let impl_shape = function | Cmt cmt_infos -> cmt_infos.cmt_impl_shape | Cms cms_infos -> cms_infos.cms_impl_shape + let declaration_dependencies = function + | Cmt cmt_infos -> cmt_infos.cmt_declaration_dependencies + | Cms cms_infos -> cms_infos.cms_declaration_dependencies - let uid_to_loc ~loc_of_decl uid = function + let uid_to_loc uid = function | Cmt cmt_infos -> Shape.Uid.Tbl.find_opt cmt_infos.cmt_uid_to_decl uid - |> Option.bind ~f:(loc_of_decl ~uid) - |> Option.map ~f:(fun (_, loc) -> loc) - | Cms cms_infos -> - Shape.Uid.Tbl.find_opt cms_infos.cms_uid_to_loc uid - |> Option.map ~f:(fun { Location.loc; _ } -> loc) + |> Option.bind ~f:(Typedtree_utils.location_of_declaration ~uid) + | Cms cms_infos -> Shape.Uid.Tbl.find_opt cms_infos.cms_uid_to_loc uid let find_doc_attribute attrs = let open Parsetree in @@ -339,7 +343,7 @@ end = struct end module Preferences : sig - val set : [ `ML | `MLI ] -> unit + val set : [ `ML | `Smart | `MLI ] -> unit val src : string -> File.t val build : string -> File.t @@ -351,7 +355,7 @@ end = struct let set choice = prioritize_impl := match choice with - | `ML -> true + | `ML | `Smart -> true | _ -> false let src file = if !prioritize_impl then File.ml file else File.mli file @@ -444,7 +448,15 @@ module Utils = struct List.dedup_adjacent files ~cmp:String.compare let find_file_with_path ~config ?(with_fallback = false) file path = - if File.name file = Mconfig.unitname config then + let title = "find_file_with_path" in + let filename = File.name file in + log ~title "Try find %S" filename; + if + File.is_source file + && filename = Mconfig.unitname config + then + (* No need to search when looking for the source of the current buffer's + compilation unit *) Some Mconfig.(config.query.filename) else let attempt_search src_suffix_pair = @@ -518,10 +530,12 @@ let move_to filename artifact = File_switching.move_to ~digest filename let load_cmt ~config ?with_fallback:(_ = true) comp_unit = + let title = "load_cmt" in Preferences.set config.ml_or_mli; let file = Preferences.build comp_unit in match Utils.find_file ~config:config.mconfig ~with_fallback:true file with | Some path -> + log ~title "Found %S at path %S" comp_unit path; let artifact = Artifact.read path in let source_file = Artifact.sourcefile artifact in let source_file = Option.value ~default:"*pack*" source_file in @@ -550,8 +564,8 @@ let scrape_alias ~env ~fallback_uid ~namespace path = when namespace = Shape.Sig_component_kind.Module_type -> (* This case is necessary to traverse module type aliases *) non_alias_declaration_uid ~fallback_uid alias_path - | _, md_uid -> md_uid - | exception Not_found -> fallback_uid + | _, md_uid -> (path, md_uid) + | exception Not_found -> (path, fallback_uid) in non_alias_declaration_uid ~fallback_uid path @@ -765,21 +779,36 @@ let find_source ~config loc path = doesn't know which is the right one: %s" matches) -(** [find_loc_of_uid] uid's location are given by tables stored int he cmt files - for external compilation units or computed by Merlin for the current buffer. - This function lookups a uid's location in the appropriate table. *) -let find_loc_of_uid ~config ~local_defs uid comp_unit = - let title = "find_loc_of_uid" in - let loc_of_decl ~uid def = - match Typedtree_utils.location_of_declaration ~uid def with - | Some loc -> - log ~title "Found location: %a" Logger.fmt (fun fmt -> - Location.print_loc fmt loc.loc); - Some (uid, loc.loc) - | None -> - log ~title "The declaration has no location."; - None +let rec unbox_uid = function + | Shape.Uid.Unboxed_version uid -> unbox_uid uid + | uid -> uid + +let lookup_uid_loc_of_decl ~config:mconfig uid = + let title = "lookup_uid_decl" in + let item = + match unbox_uid uid with + | Shape.Uid.Unboxed_version _ -> assert false + | Internal | Predef _ | Compilation_unit _ -> None + | Item { from = Intf; comp_unit; _ } -> Some (`MLI, comp_unit) + | Item { from = _; comp_unit; _ } -> Some (`ML, comp_unit) in + Option.bind item ~f:(fun (ml_or_mli, comp_unit) -> + let config = { mconfig; ml_or_mli; traverse_aliases = false } in + match load_cmt ~config comp_unit with + | Ok (_pos_fname, artifact) -> + log ~title "Cmt successfully loaded, looking for %a" Logger.fmt + (fun fmt -> Shape.Uid.print fmt uid); + Artifact.uid_to_loc uid artifact + | _ -> + log ~title "Failed to load the cmt file"; + None) + +(** uid's location are given by tables stored int he cmt files for external + compilation units or computed by Merlin for the current buffer. + [find_loc_of_uid] function lookups a uid's location in the appropriate + table. *) +let find_loc_of_item ~config ~local_defs uid comp_unit = + let title = "find_loc_of_uid" in if Misc_utils.is_current_unit comp_unit then begin log ~title "We look for %a in the current compilation unit." Logger.fmt (fun fmt -> Shape.Uid.print fmt uid); @@ -787,28 +816,12 @@ let find_loc_of_uid ~config ~local_defs uid comp_unit = Shape.Uid.print fmt uid); let tbl = Ast_iterators.build_uid_to_locs_tbl ~local_defs () in match Shape.Uid.Tbl.find_opt tbl uid with - | Some { Location.loc; _ } -> `Some (uid, loc) + | Some loc -> Some loc | None -> log ~title "Uid not found in the local table."; - `None - end - else begin - log ~title "Loading the cmt file for unit %S" comp_unit; - match load_cmt ~config comp_unit with - | Ok (_pos_fname, artifact) -> - log ~title "Shapes successfully loaded, looking for %a" Logger.fmt - (fun fmt -> Shape.Uid.print fmt uid); - begin - match Artifact.uid_to_loc ~loc_of_decl uid artifact with - | Some decl -> `Some (uid, decl) - | None -> - log ~title "Uid not found in the cmt's table."; - `None - end - | _ -> - log ~title "Failed to load the cmt file"; - `None + None end + else lookup_uid_loc_of_decl ~config:config.mconfig uid let find_loc_of_comp_unit ~config uid comp_unit = let title = "find_loc_of_comp_unit" in @@ -822,6 +835,46 @@ let find_loc_of_comp_unit ~config uid comp_unit = log ~title "Failed to load the CU's cmt"; `None +let find_loc_of_uid ~config ~local_defs ?ident ?fallback (uid : Shape.Uid.t) = + let find_loc_of_item ~comp_unit = + match + (find_loc_of_item ~config ~local_defs uid comp_unit, fallback, ident) + with + | Some { loc; txt }, _, Some ident when String.equal txt ident -> + (* Checking the ident prevent returning nonsensical results when some uid + were swaped but the cmt files were not rebuilt. *) + Some (uid, loc) + | Some { loc; _ }, _, None -> Some (uid, loc) + | (Some _ | None), Some fallback, _ -> + find_loc_of_item ~config ~local_defs fallback comp_unit + |> Option.map ~f:(fun { Location.loc; _ } -> (fallback, loc)) + | _ -> None + in + match unbox_uid uid with + | Unboxed_version _ -> assert false + | Predef s -> `Builtin (uid, s) + | Internal -> `Builtin (uid, "") + | Item { comp_unit; _ } -> `Opt (find_loc_of_item ~comp_unit) + | Compilation_unit comp_unit -> find_loc_of_comp_unit ~config uid comp_unit + +let get_linked_uids ~config ~comp_unit decl_uid = + let title = "linked_uids" in + log ~title "Try find cmt file for %s" comp_unit; + match load_cmt ~config comp_unit with + | Ok (_pos_fname, artifact) -> + log ~title "Cmt successfully loaded, looking for %a" Logger.fmt (fun fmt -> + Shape.Uid.print fmt decl_uid); + List.filter_map ~f:(function + | Cmt_format.Definition_to_declaration, def, decl when decl = decl_uid -> + Some def + | Cmt_format.Definition_to_declaration, def, decl when def = decl_uid -> + Some decl + | _ -> None) + @@ Artifact.declaration_dependencies artifact + | _ -> + log ~title "Failed to load the cmt file"; + [] + let find_definition_uid ~config ~env ~(decl : Env_lookup.item) path = let namespace = decl.namespace in let module Reduce = Shape_reduce.Make (struct @@ -872,24 +925,27 @@ let rec uid_of_result ~traverse_aliases = function let from_path ~config ~env ~local_defs ~decl path = let title = "from_path" in let unalias (decl : Env_lookup.item) = - if not config.traverse_aliases then decl.uid + if not config.traverse_aliases then (path, decl.uid) else let namespace = decl.namespace in - let uid = scrape_alias ~fallback_uid:decl.uid ~env ~namespace path in + let path, uid = + scrape_alias ~fallback_uid:decl.uid ~env ~namespace path + in if uid <> decl.uid then log ~title:"uid_of_path" "Unaliased declaration uid: %a -> %a" Logger.fmt (Fun.flip Shape.Uid.print decl.uid) Logger.fmt (Fun.flip Shape.Uid.print uid); - uid + (path, uid) in (* Step 1: Path => Uid *) - let decl : Env_lookup.item = { decl with uid = unalias decl } in + let path, uid = unalias decl in + let decl : Env_lookup.item = { decl with uid } in let uid, approximated = match config.ml_or_mli with | `MLI -> (decl.uid, false) - | `ML -> ( + | `ML | `Smart -> ( let traverse_aliases = config.traverse_aliases in let result = find_definition_uid ~config ~env ~decl path in match uid_of_result ~traverse_aliases result with @@ -900,30 +956,36 @@ let from_path ~config ~env ~local_defs ~decl path = (Fun.flip Shape.Uid.print decl.uid); (decl.uid, true)) in + (* Step 1': Try refine Uid *) + let impl_uid = + (* When looking for a definition but stuck on an interface we load the + corresponding cmt file to try to find a corresponding definition. *) + match (uid, config.ml_or_mli) with + | Item { from = Intf; comp_unit; _ }, `Smart -> ( + match get_linked_uids ~config ~comp_unit uid with + | [ uid ] -> Some uid + | _ -> None) + | _ -> None + in (* Step 2: Uid => Location *) let loc = - let rec get_location : Shape.Uid.t -> _ = function - | Predef s -> `Builtin (uid, s) - | Internal -> `Builtin (uid, "") - | Item { comp_unit; _ } -> - find_loc_of_uid ~config ~local_defs uid comp_unit - | Compilation_unit comp_unit -> - find_loc_of_comp_unit ~config uid comp_unit - | Unboxed_version uid -> get_location uid - in - get_location uid + match impl_uid with + | Some impl_uid -> + let ident = Path.last path in + find_loc_of_uid ~config ~local_defs ~ident ~fallback:uid impl_uid + | None -> find_loc_of_uid ~config ~local_defs uid in let loc = match loc with - | `None -> + | `None | `Opt None -> log ~title "Falling back to the declaration's location: %a" Logger.fmt (Fun.flip Location.print_loc decl.loc); `Some (decl.uid, decl.loc) - | other -> other + | `Opt (Some result) -> `Some result + | (`Builtin _ | `Some _) as other -> other in (* Step 3: Location => Source *) match loc with - | `None -> assert false | `Builtin _ as err -> err | `Some (uid, loc) -> ( match find_source ~config:config.mconfig loc (Path.name path) with @@ -1007,7 +1069,7 @@ let from_string ~config ~env ~local_defs ~pos ?let_pun_behavior log ~title:"from_string" "looking for the source of '%s' (prioritizing %s files)" path (match config.ml_or_mli with - | `ML -> ".ml" + | `ML | `Smart -> ".ml" | `MLI -> ".mli"); from_longident ~config ~env ~local_defs nss ident in diff --git a/src/analysis/locate.mli b/src/analysis/locate.mli index 1824f54a3..804dba7e2 100644 --- a/src/analysis/locate.mli +++ b/src/analysis/locate.mli @@ -37,7 +37,13 @@ end val log : 'a Logger.printf type config = - { mconfig : Mconfig.t; ml_or_mli : [ `ML | `MLI ]; traverse_aliases : bool } + { mconfig : Mconfig.t; + ml_or_mli : [ `ML | `Smart | `MLI ]; + (** When [ml_or_mli] is [`Smart], if locate blocks on an interface uid, + it will use the [cmt_declaration_dependencies] to try finding a + unique corresponding definition in the implementation. *) + traverse_aliases : bool + } type result = { uid : Shape.Uid.t; @@ -60,6 +66,19 @@ end val uid_of_result : traverse_aliases:bool -> Shape_reduce.result -> Shape.Uid.t option * bool +(* val lookup_uid_decl : + config:Mconfig.t -> Shape.Uid.t -> Typedtree.item_declaration option *) + +(** Lookup the declaration of the given Uid in the appropriate cmt file *) +val lookup_uid_loc_of_decl : + config:Mconfig.t -> Shape.Uid.t -> string Location.loc option + +(** [get_linked_uids] queries the [cmt_declaration_dependencies] table and + returns udis related to the one passed as argument. TODO right now this + function only returns simple links tagged with [Definition_to_declaration] *) +val get_linked_uids : + config:config -> comp_unit:string -> Shape.Uid.t -> Shape.Uid.t list + val find_source : config:Mconfig.t -> Warnings.loc -> diff --git a/src/analysis/occurrences.ml b/src/analysis/occurrences.ml index 289a2bf18..6dca8d993 100644 --- a/src/analysis/occurrences.ml +++ b/src/analysis/occurrences.ml @@ -177,10 +177,73 @@ let get_buffer_locs result uid = if Shape.Uid.equal uid uid' then Lid_set.add loc acc else acc) (Mtyper.get_index result) Lid_set.empty -let is_in_interface (config : Mconfig.t) (loc : Warnings.loc) = - let extension = Filename.extension loc.loc_start.pos_fname in - List.exists config.merlin.suffixes ~f:(fun (_impl, intf) -> - String.equal extension intf) +let get_external_locs ~(config : Mconfig.t) ~current_buffer_path uid = + let title = "get_external_locs" in + List.filter_map config.merlin.index_files ~f:(fun file -> + log ~title "Lookin for occurrences of %a in index %s" Logger.fmt + (Fun.flip Shape.Uid.print uid) + file; + let external_locs = + try + let external_index = Index_cache.read file in + Index_format.Uid_map.find_opt uid external_index.defs + |> Option.map ~f:(fun uid_locs -> (external_index, uid_locs)) + with Index_format.Not_an_index _ | Sys_error _ -> + log ~title:"external_index" "Could not load index %s" file; + None + in + Option.map external_locs ~f:(fun (index, locs) -> + let stats = Stat_check.create ~cache_size:128 index in + ( Lid_set.filter + (fun ({ loc; _ } as lid) -> + let is_current_buffer = + (* We filter external results that concern the current buffer *) + let file = loc.Location.loc_start.Lexing.pos_fname in + let file, buf = + match config.merlin.source_root with + | Some root -> + (Filename.concat root file, current_buffer_path) + | None -> (file, config.query.filename) + in + let file = Misc.canonicalize_filename file in + let buf = Misc.canonicalize_filename buf in + String.equal file buf + in + let should_be_ignored = + (* We ignore results that don't have a location *) + Index_occurrences.should_ignore_lid lid + in + if is_current_buffer || should_be_ignored then false + else begin + (* We ignore external results if their source was modified *) + let check = Stat_check.check stats ~file in + if not check then + log ~title:"locs_of" "File %s might be out-of-sync." file; + check + end) + locs, + Stat_check.get_outdated_files stats ))) + +let find_linked_uids ~config ~name uid = + let title = "find_linked_uids" in + match uid with + | Shape.Uid.Item { from = _; comp_unit; _ } -> ( + let config = + { Locate.mconfig = config; ml_or_mli = `ML; traverse_aliases = false } + in + match Locate.get_linked_uids ~config ~comp_unit uid with + | [ uid' ] -> + log ~title "Found linked uid: %a" Logger.fmt (fun fmt -> + Shape.Uid.print fmt uid'); + let name_check = + Locate.lookup_uid_loc_of_decl ~config:config.mconfig uid' + |> Option.value_map + ~f:(fun { Location.txt; _ } -> String.equal name txt) + ~default:false + in + if name_check then [ uid' ] else [] + | _ -> []) + | _ -> [] let locs_of ~config ~env ~typer_result ~pos ~scope path = log ~title:"occurrences" "Looking for occurences of %s (pos: %s)" path @@ -200,28 +263,11 @@ let locs_of ~config ~env ~typer_result ~pos ~scope path = let browse = Mbrowse.of_typedtree local_defs in let env, node = Mbrowse.leaf_node (Mbrowse.enclosing pos [ browse ]) in let node_uid_loc = uid_and_loc_of_node env node in - let scope = - match node_uid_loc with - | Some (_, l) when is_in_interface config l -> - (* There is no way to distinguish uids from interfaces from uids of - implementations. We fallback on buffer occurrences in that case. - TODO: we should be able to improve on that situation when we will be - able to distinguish between impl/intf uids and know which declaration - are actually linked. *) - `Buffer - | _ -> scope - in (node_uid_loc, scope) | `Found { uid; location; approximated = false; _ } | `File_not_found { uid; location; approximated = false; _ } -> log ~title:"locs_of" "Found definition uid using locate: %a " Logger.fmt (fun fmt -> Shape.Uid.print fmt uid); - (* There is no way to distinguish uids from interfaces from uids of - implementations. We fallback on buffer occurrences in that case. - TODO: we should be able to improve on that situation when we will be - able to distinguish between impl/intf uids and know which declaration - are actually linked. *) - let scope = if is_in_interface config location then `Buffer else scope in (Some (uid, location), scope) | `Found { decl_uid; location; approximated = true; _ } | `File_not_found { decl_uid; location; approximated = true; _ } -> @@ -252,55 +298,13 @@ let locs_of ~config ~env ~typer_result ~pos ~scope path = let external_occurrences = if scope = `Buffer then [] else - List.filter_map config.merlin.index_files ~f:(fun index_file -> - let external_locs = - try - let external_index = Index_cache.read index_file in - Index_format.Uid_map.find_opt def_uid external_index.defs - |> Option.map ~f:(fun uid_locs -> (external_index, uid_locs)) - with Index_format.Not_an_index _ | Sys_error _ -> - log ~title:"external_index" "Could not load index %s" index_file; - None - in - Option.map external_locs ~f:(fun (index, locs) -> - let stats = Stat_check.create ~cache_size:128 index in - ( Occurrence_set.of_filtered_lid_set locs - ~f:(fun ({ loc; _ } as lid) -> - (* We filter external results that concern the current buffer *) - let file_rel_to_root = - loc.Location.loc_start.Lexing.pos_fname - in - let file_uncanon, buf_uncanon = - match config.merlin.source_root with - | Some root -> - ( Filename.concat root file_rel_to_root, - current_buffer_path ) - | None -> (file_rel_to_root, config.query.filename) - in - let file = Misc.canonicalize_filename file_uncanon in - let buf = Misc.canonicalize_filename buf_uncanon in - let is_current_buffer = String.equal file buf in - let should_be_ignored = - (* We ignore results that don't have a location *) - Index_occurrences.should_ignore_lid lid - in - if is_current_buffer || should_be_ignored then None - else begin - (* We ignore external results if their source was modified *) - let is_fresh = - Stat_check.check stats ~file:file_rel_to_root - in - if not is_fresh then - log ~title:"locs_of" "File %s might be out-of-sync." - file; - let staleness : Staleness.t = - match is_fresh with - | true -> Fresh - | false -> Stale - in - Some staleness - end), - Stat_check.get_outdated_files stats ))) + let name = + String.split_on_char ~sep:'.' path |> List.last |> Option.get + in + let additional_uids = find_linked_uids ~config ~name def_uid in + List.concat_map + (def_uid :: additional_uids) + ~f:(get_external_locs ~config ~current_buffer_path) in let external_occurrences, out_of_sync_files = List.fold_left diff --git a/src/frontend/query_commands.ml b/src/frontend/query_commands.ml index 17f6e8327..dc44ce672 100644 --- a/src/frontend/query_commands.ml +++ b/src/frontend/query_commands.ml @@ -586,6 +586,11 @@ let dispatch pipeline (type a) : a Query_protocol.t -> a = function in if path = "" then `Invalid_context else + let ml_or_mli = + match ml_or_mli with + | `ML -> `Smart + | `MLI -> `MLI + in let config = Locate. { mconfig = Mpipeline.final_config pipeline; diff --git a/src/ocaml-index/lib/index.ml b/src/ocaml-index/lib/index.ml index e95623630..62cd855df 100644 --- a/src/ocaml-index/lib/index.ml +++ b/src/ocaml-index/lib/index.ml @@ -103,10 +103,8 @@ let index_of_artifact ~root ~rewrite_root ~build_path ~do_not_use_cmt_loadpath let shapes = shapes end)) in let defs = - if Option.is_none cmt_impl_shape then Shape.Uid.Map.empty - else - gather_locs_from_fragments ~root ~rewrite_root Shape.Uid.Map.empty - uid_to_loc + gather_locs_from_fragments ~root ~rewrite_root Shape.Uid.Map.empty + uid_to_loc in (* The list [cmt_ident_occurrences] associate each ident usage location in the module with its (partially) reduced shape. We finish the reduction and @@ -118,11 +116,6 @@ let index_of_artifact ~root ~rewrite_root ~build_path ~do_not_use_cmt_loadpath let resolved = match item with | Unresolved shape -> Reduce.reduce_for_uid cmt_initial_env shape - | Resolved _ when Option.is_none cmt_impl_shape -> - (* Right now, without additional information we cannot take the - risk to mix uids from interfaces with the ones from - implementations. We simply ignore items defined in an interface. *) - Internal_error_missing_uid | result -> result in match Locate.uid_of_result ~traverse_aliases:false resolved with diff --git a/src/ocaml-index/tests/tests-dirs/interfaces.t b/src/ocaml-index/tests/tests-dirs/interfaces.t index 7d1176511..c72eda193 100644 --- a/src/ocaml-index/tests/tests-dirs/interfaces.t +++ b/src/ocaml-index/tests/tests-dirs/interfaces.t @@ -17,7 +17,8 @@ $ ocaml-index aggregate main.cmti -o main.index $ ocaml-index dump main.index - 1 uids: - {uid: Stdlib__Float.81; locs: + 2 uids: + {uid: [intf]Main.0; locs: "t": File "main.mli", line 1, characters 5-6 + uid: Stdlib__Float.81; locs: "Float.t": File "main.mli", line 1, characters 9-16 }, 0 approx shapes: {}, and shapes for CUS . diff --git a/tests/test-dirs/locate/context-detection/cd-test.t/run.t b/tests/test-dirs/locate/context-detection/cd-test.t/run.t index 3e0131483..867e37d01 100644 --- a/tests/test-dirs/locate/context-detection/cd-test.t/run.t +++ b/tests/test-dirs/locate/context-detection/cd-test.t/run.t @@ -32,7 +32,7 @@ Trying them all: "value": { "file": "$TESTCASE_ROOT/test.ml", "pos": { - "line": 7, + "line": 3, "col": 12 } }, diff --git a/tests/test-dirs/locate/dune b/tests/test-dirs/locate/dune index 780852851..2fed16258 100755 --- a/tests/test-dirs/locate/dune +++ b/tests/test-dirs/locate/dune @@ -1,6 +1,6 @@ (cram (applies_to looping-substitution mutually-recursive partial-cmt includes - issue802 issue845 issue1199 issue1524 sig-substs l-413-features + issue802 issue845 issue1848 issue1199 issue1524 sig-substs l-413-features module-aliases locate-constrs without-implem without-sig module-decl-aliases in-implicit-trans-dep distinguish-files) (enabled_if diff --git a/tests/test-dirs/locate/issue1848.t b/tests/test-dirs/locate/issue1848.t new file mode 100644 index 000000000..a196bc3f2 --- /dev/null +++ b/tests/test-dirs/locate/issue1848.t @@ -0,0 +1,82 @@ +Create a module with an mli file + $ cat > foo.ml << EOF + > type t = Foo + > module Bar = struct + > type t = Bar + > end + > EOF + + $ cat > foo.mli << EOF + > module Bar : sig + > type t + > end + > type t + > EOF + + $ $OCAMLC -c -bin-annot foo.mli + $ $OCAMLC -c -bin-annot foo.ml + +Locate the Bar on line 4 + $ cat > test1.ml << EOF + > module type Foo = sig + > include module type of Foo + > module Bar : sig + > include module type of Bar + > end + > end + > EOF + + + $ $MERLIN single locate -position 4:28 -look-for mli \ + > -filename test1.ml < test1.ml | jq .value + { + "file": "$TESTCASE_ROOT/foo.mli", + "pos": { + "line": 1, + "col": 7 + } + } + +Module Bar in foo.mli is a correct answer, but since there is only +one corresponding implementation we can jump there instead. + $ $MERLIN single locate -position 4:28 -look-for ml \ + > -filename test1.ml < test1.ml | jq .value + { + "file": "$TESTCASE_ROOT/foo.ml", + "pos": { + "line": 2, + "col": 7 + } + } + +Locate the Bar on line 3 + $ cat > test2.ml << EOF + > include Foo + > module Bar = struct + > include Bar + > end + > EOF + +Correctly returns 2:7 + $ $MERLIN single locate -position 3:12 -look-for ml -filename test2.ml < test2.ml | jq .value + { + "file": "$TESTCASE_ROOT/foo.ml", + "pos": { + "line": 2, + "col": 7 + } + } + +Locate the Foo.Bar on line 1 + $ cat > test3.ml << EOF + > include module type of Foo.Bar + > EOF +Correctly returns 2:7 + $ $MERLIN single locate -position 1:28 -look-for ml -filename test3.ml < test3.ml | jq .value + { + "file": "$TESTCASE_ROOT/foo.ml", + "pos": { + "line": 2, + "col": 7 + } + } diff --git a/tests/test-dirs/occurrences/project-wide/mli-vs-ml.t b/tests/test-dirs/occurrences/project-wide/mli-vs-ml.t index f09ebcfcd..ad6499425 100644 --- a/tests/test-dirs/occurrences/project-wide/mli-vs-ml.t +++ b/tests/test-dirs/occurrences/project-wide/mli-vs-ml.t @@ -6,6 +6,7 @@ $ cat >main.ml <<'EOF' > let x = () > type t = unit + > let _ : t = () > EOF $ $OCAMLC -bin-annot -bin-annot-occurrences -c main.mli main.ml @@ -14,12 +15,19 @@ The indexer should not mixup uids from mli and ml files: $ ocaml-index dump project.ocaml-index - 2 uids: - {uid: Main.0; locs: "x": File "main.ml", line 1, characters 4-5 - uid: Main.1; locs: "t": File "main.ml", line 2, characters 5-6 }, - 0 approx shapes: {}, and shapes for CUS . + 4 uids: + {uid: [intf]Main.0; locs: + "t": File "main.mli", line 1, characters 5-6; + "t": File "main.mli", line 2, characters 8-9 + uid: Main.0; locs: "x": File "main.ml", line 1, characters 4-5 + uid: [intf]Main.1; locs: "x": File "main.mli", line 2, characters 4-5 + uid: Main.1; locs: + "t": File "main.ml", line 2, characters 5-6; + "t": File "main.ml", line 3, characters 8-9 + }, 0 approx shapes: {}, and shapes for CUS . -Merlin should not mixup uids from mli and ml files: +Merlin should not mixup uids from mli and ml files, and return results in both +the interface and the implementation. $ $MERLIN single occurrences -scope project -identifier-at 2:8 \ > -index-file project.ocaml-index \ > -filename main.mli -index-file project.ocaml-index \ + > -index-file project.ocaml-index \ > -filename main.mli -index-file project.ocaml-index \ + > -filename main.ml -index-file project.ocaml-index \ + > -filename main.ml -index-file project.ocaml-index \ + > -filename main.mli main.ml <<'EOF' + > type t = unit + > let x = () + > let _ : t = () + > EOF + +Merlin should not get confused and return an occurrence of `x` in the interface +when asked from occurrences of `t` in the implementation. + +FIXME: this is based on a heuristic that compares the identifiers it could still +get confused if both identifers are the same. + $ $MERLIN single occurrences -scope project -identifier-at 1:5 \ + > -index-file project.ocaml-index \ + > -filename main.ml Date: Wed, 9 Apr 2025 15:17:27 -0400 Subject: [PATCH 3/7] Downstream: perform less merges when indexing (ocaml/merlin#1881) Immediately grow the final index instead of building and merging. --- CHANGES.md | 1 + src/ocaml-index/lib/index.ml | 75 +++++++++++++++++++----------------- 2 files changed, 41 insertions(+), 35 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 4688eb026..cdbe5f2b7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -6,6 +6,7 @@ unreleased file (#1865) - Use new 5.3 features to improve locate behavior in some cases. Merlin no longer confuses uids from interfaces and implementations. (#1857) + - Perform less merges in the indexer (#1881) merlin 5.2 ========== diff --git a/src/ocaml-index/lib/index.ml b/src/ocaml-index/lib/index.ml index 62cd855df..29213a371 100644 --- a/src/ocaml-index/lib/index.ml +++ b/src/ocaml-index/lib/index.ml @@ -24,6 +24,13 @@ let merge m m' = (fun _uid locs locs' -> Some (Lid_set.union locs locs')) m m' +let add_one uid lid map = + Shape.Uid.Map.update uid + (function + | None -> Some (Lid_set.singleton lid) + | Some set -> Some (Lid_set.add lid set)) + map + (** Cmt files contains a table of declarations' Uids associated to a typedtree fragment. [add_locs_from_fragments] gather locations from these *) let gather_locs_from_fragments ~root ~rewrite_root map fragments = @@ -36,7 +43,7 @@ let gather_locs_from_fragments ~root ~rewrite_root map fragments = | Some lid -> let lid = to_located_lid lid in let lid = if rewrite_root then add_root ~root lid else lid in - Shape.Uid.Map.add uid (Lid_set.singleton lid) acc + add_one uid lid acc in Shape.Uid.Tbl.fold add_loc fragments map @@ -95,16 +102,16 @@ let init_load_path_once ~do_not_use_cmt_loadpath = Load_path.(init ~auto_include:no_auto_include ~visible ~hidden); loaded := true) -let index_of_artifact ~root ~rewrite_root ~build_path ~do_not_use_cmt_loadpath - ~shapes ~cmt_loadpath ~cmt_impl_shape ~cmt_modname ~uid_to_loc - ~cmt_ident_occurrences ~cmt_initial_env ~cmt_sourcefile ~cmt_source_digest = +let index_of_artifact ~into ~root ~rewrite_root ~build_path + ~do_not_use_cmt_loadpath ~shapes ~cmt_loadpath ~cmt_impl_shape ~cmt_modname + ~uid_to_loc ~cmt_ident_occurrences ~cmt_initial_env ~cmt_sourcefile + ~cmt_source_digest = init_load_path_once ~do_not_use_cmt_loadpath ~dirs:build_path cmt_loadpath; let module Reduce = Shape_reduce.Make (Reduce_conf (struct let shapes = shapes end)) in let defs = - gather_locs_from_fragments ~root ~rewrite_root Shape.Uid.Map.empty - uid_to_loc + gather_locs_from_fragments ~root ~rewrite_root into.defs uid_to_loc in (* The list [cmt_ident_occurrences] associate each ident usage location in the module with its (partially) reduced shape. We finish the reduction and @@ -119,30 +126,30 @@ let index_of_artifact ~root ~rewrite_root ~build_path ~do_not_use_cmt_loadpath | result -> result in match Locate.uid_of_result ~traverse_aliases:false resolved with - | Some uid, false -> (add acc_defs uid (Lid_set.singleton lid), acc_apx) - | Some uid, true -> (acc_defs, add acc_apx uid (Lid_set.singleton lid)) + | Some uid, false -> (add_one uid lid acc_defs, acc_apx) + | Some uid, true -> (acc_defs, add_one uid lid acc_apx) | None, _ -> acc) - (defs, Shape.Uid.Map.empty) - cmt_ident_occurrences + (defs, into.approximated) cmt_ident_occurrences in - let cu_shape = Hashtbl.create 1 in + let cu_shape = into.cu_shape in Option.iter (Hashtbl.add cu_shape cmt_modname) cmt_impl_shape; let stats = match cmt_sourcefile with - | None -> Stats.empty + | None -> into.stats | Some src -> ( let rooted_src = with_root ?root src in try let stats = Unix.stat rooted_src in let src = if rewrite_root then rooted_src else src in - Stats.singleton src + Stats.add src { mtime = stats.st_mtime; size = stats.st_size; source_digest = cmt_source_digest } - with Unix.Unix_error _ -> Stats.empty) + into.stats + with Unix.Unix_error _ -> into.stats) in - { defs; approximated; cu_shape; stats; root_directory = None } + { defs; approximated; cu_shape; stats; root_directory = into.root_directory } let shape_of_artifact ~impl_shape ~modname = let cu_shape = Hashtbl.create 1 in @@ -232,27 +239,25 @@ let from_files ~store_shapes ~output_file ~root ~rewrite_root ~build_path List.fold_left (fun into file -> Log.debug "Indexing from file: %s" file; - let index = - match Cms_cache.read file with - | cms_item -> - index_of_cms ~root ~rewrite_root ~build_path - ~do_not_use_cmt_loadpath ~shapes:into.cu_shape cms_item.cms_infos + match Cms_cache.read file with + | cms_item -> + index_of_cms ~into ~root ~rewrite_root ~build_path + ~do_not_use_cmt_loadpath ~shapes:into.cu_shape cms_item.cms_infos + | exception _ -> ( + match Cmt_cache.read file with + | cmt_item -> + index_of_cmt ~into ~root ~rewrite_root ~build_path + ~do_not_use_cmt_loadpath ~shapes:into.cu_shape cmt_item.cmt_infos | exception _ -> ( - match Cmt_cache.read file with - | cmt_item -> - index_of_cmt ~root ~rewrite_root ~build_path - ~do_not_use_cmt_loadpath ~shapes:into.cu_shape - cmt_item.cmt_infos - | exception _ -> ( - match read ~file with - | Index index -> index - | _ -> - Log.error "Unknown file type: %s" file; - exit 1)) - in - (* We add the shapes into `into` because we need to collect them so we can use - them for shape reduction, regardless of whether store_shapes is true *) - merge_index ~store_shapes:true index ~into) + match read ~file with + | Index index -> + (* We add the shapes into `into` because we need to collect them so we can use + them for shape reduction, regardless of whether store_shapes + is true *) + merge_index ~store_shapes:true index ~into + | _ -> + Log.error "Unknown file type: %s" file; + exit 1))) initial_index files in let final_index = From d57395e6e9f8c95ca4438e981c7b44a71499b173 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulysse=20G=C3=A9rard?= Date: Wed, 9 Apr 2025 16:39:20 -0400 Subject: [PATCH 4/7] Merge project-wide renaming changes from (ocaml/merlin#1877) --- CHANGES.md | 2 + doc/dev/PROTOCOL.md | 6 +- src/analysis/occurrences.ml | 163 ++++++++++-------- src/analysis/occurrences.mli | 2 +- src/commands/new_commands.ml | 1 + src/commands/query_json.ml | 12 +- src/frontend/query_protocol.ml | 4 +- src/index-format/index_format.ml | 27 ++- src/index-format/index_format.mli | 4 +- src/index-format/union_find.ml | 40 +++++ src/ocaml-index/lib/index.ml | 44 ++++- .../tests/tests-dirs/demonstate-cms-deps.t | 5 + .../tests/tests-dirs/gather-shapes.t | 2 + .../tests/tests-dirs/index-project.t | 3 + src/ocaml-index/tests/tests-dirs/interfaces.t | 1 + .../tests-dirs/local-shape-and-include.t | 2 + .../tests/tests-dirs/transitive-deps-cms.t | 3 + .../tests/tests-dirs/transitive-deps.t | 3 + .../for-renaming/r-modules-and-types.t | 90 ++++++++++ .../for-renaming/r-with-functors.t/dune | 2 + .../r-with-functors.t/dune-project | 1 + .../for-renaming/r-with-functors.t/func.ml | 5 + .../for-renaming/r-with-functors.t/func.mli | 5 + .../for-renaming/r-with-functors.t/main.ml | 4 + .../for-renaming/r-with-functors.t/run.t | 34 ++++ .../occurrences/project-wide/mli-vs-ml.t | 1 + .../occurrences/project-wide/prefix.t/run.t | 1 + .../occurrences/project-wide/pwo-basic.t | 1 + .../occurrences/project-wide/pwo-ml-gen.t | 2 + .../occurrences/project-wide/pwo-relative.t | 1 + .../server-tests/pwo-uid-stability.t | 3 + 31 files changed, 383 insertions(+), 91 deletions(-) create mode 100644 src/index-format/union_find.ml create mode 100644 tests/test-dirs/occurrences/project-wide/for-renaming/r-modules-and-types.t create mode 100644 tests/test-dirs/occurrences/project-wide/for-renaming/r-with-functors.t/dune create mode 100644 tests/test-dirs/occurrences/project-wide/for-renaming/r-with-functors.t/dune-project create mode 100644 tests/test-dirs/occurrences/project-wide/for-renaming/r-with-functors.t/func.ml create mode 100644 tests/test-dirs/occurrences/project-wide/for-renaming/r-with-functors.t/func.mli create mode 100644 tests/test-dirs/occurrences/project-wide/for-renaming/r-with-functors.t/main.ml create mode 100644 tests/test-dirs/occurrences/project-wide/for-renaming/r-with-functors.t/run.t diff --git a/CHANGES.md b/CHANGES.md index cdbe5f2b7..a7988d1b2 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -7,6 +7,8 @@ unreleased - Use new 5.3 features to improve locate behavior in some cases. Merlin no longer confuses uids from interfaces and implementations. (#1857) - Perform less merges in the indexer (#1881) + - Add initial support for project-wide renaming: occurrences can now return + all usages of all related definitions. (#1877) merlin 5.2 ========== diff --git a/doc/dev/PROTOCOL.md b/doc/dev/PROTOCOL.md index 7e84bcd7e..a6b86a0ef 100644 --- a/doc/dev/PROTOCOL.md +++ b/doc/dev/PROTOCOL.md @@ -355,9 +355,13 @@ Returns either: Returns a list of locations `{'start': position, 'end': position}` of all occurrences in current buffer of the entity at the specified position. If scope -is set to `project` the returned locations will also contain a field `file`: +is set to `project` or `renaming`‡ the returned locations will also contain a field `file`: `{'file': string, 'start': position, 'end': position}`. +When the scope is set to `renaming`, all usages of all the related definitions +corresponding to an identifier will be returned. When scope is `project` only +the usages of the current definition will be returned. + ### `outline` diff --git a/src/analysis/occurrences.ml b/src/analysis/occurrences.ml index 6dca8d993..79062e2ae 100644 --- a/src/analysis/occurrences.ml +++ b/src/analysis/occurrences.ml @@ -71,7 +71,7 @@ let set_fname ~file (loc : Location.t) = correspond to a ppx extension node.) In such a case, attempting to modify the location to only include the last segment of the identifier is nonsensical. Since we don't have a way to detect such a case, it forces us to not try. *) -(* + (* A longident can have the form: A.B.x Right now we are only interested in values, but we will eventually want to index all occurrences of modules in such longidents. However there is an issue with that: we only have the @@ -83,20 +83,25 @@ let set_fname ~file (loc : Location.t) = when the ident does not require parenthesis. In that case the loc sie differs from the name size in a way that depends on the concrete syntax which is lost. *) -let last_loc (loc : Location.t) lid = - match lid with - | Longident.Lident _ -> loc - | _ -> - let last_segment = Longident.last lid in - let needs_parens = Pprintast.needs_parens last_segment in - if not needs_parens then - let last_size = last_segment |> String.length in - { loc with - loc_start = - { loc.loc_end with pos_cnum = loc.loc_end.pos_cnum - last_size } - } - else loc -*) +let last_loc_for_renaming (loc : Location.t) lid = + (* Merlin-jst: we do want to keep some of these ghost locs in cases like + punning, but not in the case of ppxes, the disctinction is not obvious to + make. *) + if loc.loc_ghost then None + else + Some + (match lid with + | Longident.Lident _ -> loc + | _ -> + let last_segment = Longident.last lid in + let needs_parens = Pprintast.needs_parens last_segment in + if not needs_parens then + let last_size = last_segment |> String.length in + { loc with + loc_start = + { loc.loc_end with pos_cnum = loc.loc_end.pos_cnum - last_size } + } + else loc) let uid_and_loc_of_node env node = let open Browse_raw in @@ -224,25 +229,53 @@ let get_external_locs ~(config : Mconfig.t) ~current_buffer_path uid = locs, Stat_check.get_outdated_files stats ))) -let find_linked_uids ~config ~name uid = +let lookup_related_uids_in_indexes ~(config : Mconfig.t) uid = + let title = "lookup_related_uids_in_indexes" in + let open Index_format in + let related_uids = + List.fold_left ~init:Uid_map.empty config.merlin.index_files + ~f:(fun acc index_file -> + try + let index = Index_cache.read index_file in + Uid_map.union + (fun _ a b -> Some (Union_find.union ~f:Uid_set.union a b)) + index.related_uids acc + with Index_format.Not_an_index _ | Sys_error _ -> + log ~title "Could not load index %s" index_file; + acc) + in + Uid_map.find_opt uid related_uids + |> Option.value_map ~default:[] ~f:(fun x -> + x |> Union_find.get |> Uid_set.to_list) + +let find_linked_uids ~config ~scope ~name uid = let title = "find_linked_uids" in match uid with - | Shape.Uid.Item { from = _; comp_unit; _ } -> ( - let config = + | Shape.Uid.Item { from = _; comp_unit; _ } -> + let locate_config = { Locate.mconfig = config; ml_or_mli = `ML; traverse_aliases = false } in - match Locate.get_linked_uids ~config ~comp_unit uid with - | [ uid' ] -> - log ~title "Found linked uid: %a" Logger.fmt (fun fmt -> - Shape.Uid.print fmt uid'); - let name_check = - Locate.lookup_uid_loc_of_decl ~config:config.mconfig uid' - |> Option.value_map - ~f:(fun { Location.txt; _ } -> String.equal name txt) - ~default:false - in - if name_check then [ uid' ] else [] - | _ -> []) + let check_name uid = + Locate.lookup_uid_loc_of_decl ~config uid + |> Option.value_map + ~f:(fun { Location.txt; _ } -> + let result = String.equal name txt in + if not result then + log ~title "Found clashing idents %S <> %S. Ignoring UID %a." + name txt Logger.fmt + (Fun.flip Shape.Uid.print uid); + result) + ~default:false + in + let related_uids = + match scope with + | `Buffer -> [] + | `Project -> Locate.get_linked_uids ~config:locate_config ~comp_unit uid + | `Renaming -> lookup_related_uids_in_indexes ~config uid + in + log ~title "Found related uids: [%a]" Logger.fmt (fun fmt -> + List.iter ~f:(fprintf fmt "%a;" Shape.Uid.print) related_uids); + List.filter ~f:check_name related_uids | _ -> [] let locs_of ~config ~env ~typer_result ~pos ~scope path = @@ -301,7 +334,7 @@ let locs_of ~config ~env ~typer_result ~pos ~scope path = let name = String.split_on_char ~sep:'.' path |> List.last |> Option.get in - let additional_uids = find_linked_uids ~config ~name def_uid in + let additional_uids = find_linked_uids ~config ~scope ~name def_uid in List.concat_map (def_uid :: additional_uids) ~f:(get_external_locs ~config ~current_buffer_path) @@ -316,43 +349,33 @@ let locs_of ~config ~env ~typer_result ~pos ~scope path = let occurrences = Occurrence_set.union buffer_occurrences external_occurrences in - let occurrences = Occurrence_set.to_list occurrences in - log ~title:"occurrences" "Found %i locs" (List.length occurrences); - let occurrences = - List.filter_map occurrences - ~f:(fun (({ txt; loc } : _ Location.loc), staleness) -> - (* Canonoicalize filenames. Some of the paths may have redundant `.`s or `..`s in - them. Although canonicalizing is not necessary for correctness, it makes the - output a bit nicer. *) - let file = - Misc.canonicalize_filename ?cwd:config.merlin.source_root - loc.loc_start.pos_fname - in - let loc = set_fname ~file loc in - let lid = try Longident.head txt with _ -> "not flat lid" in - log ~title:"occurrences" "Found occ: %s %a" lid Logger.fmt - (Fun.flip Location.print_loc loc); - (* Merlin-jst: See comment at the commented-out definition of last_loc for - explanation of why this is commented out. *) - (* let loc = last_loc loc txt in *) - let fname = loc.Location.loc_start.Lexing.pos_fname in - let loc = - if not (Filename.is_relative fname) then Some loc - else - match config.merlin.source_root with - | Some path -> - let file = Filename.concat path loc.loc_start.pos_fname in - Some (set_fname ~file loc) - | None -> begin - match Locate.find_source ~config loc fname with - | `Found (file, _) -> Some (set_fname ~file loc) - | `File_not_found msg -> - log ~title:"occurrences" "%s" msg; - None - end - in - Option.map loc ~f:(fun loc : Query_protocol.occurrence -> - { loc; is_stale = Staleness.is_stale staleness })) + let locs = Lid_set.map canonicalize_file_in_loc locs in + let locs = + log ~title:"occurrences" "Found %i locs" (Lid_set.cardinal locs); + Lid_set.elements locs + |> List.filter_map ~f:(fun { Location.txt; loc } -> + let lid = try Longident.head txt with _ -> "not flat lid" in + log ~title:"occurrences" "Found occ: %s %a" lid Logger.fmt + (Fun.flip Location.print_loc loc); + let loc = + if scope = `Renaming then last_loc_for_renaming loc txt + else Some loc + in + Option.bind loc ~f:(fun loc -> + let fname = loc.Location.loc_start.Lexing.pos_fname in + if not (Filename.is_relative fname) then Some loc + else + match config.merlin.source_root with + | Some path -> + let file = Filename.concat path loc.loc_start.pos_fname in + Some (set_fname ~file loc) + | None -> begin + match Locate.find_source ~config loc fname with + | `Found (file, _) -> Some (set_fname ~file loc) + | `File_not_found msg -> + log ~title:"occurrences" "%s" msg; + None + end)) in let def_uid_is_in_current_unit = let uid_comp_unit = comp_unit_of_uid def_uid in @@ -361,9 +384,9 @@ let locs_of ~config ~env ~typer_result ~pos ~scope path = in let status = match (scope, String.Set.to_list out_of_sync_files) with - | `Project, [] -> `Included - | `Project, l -> `Out_of_sync l | `Buffer, _ -> `Not_requested + | _, [] -> `Included + | _, l -> `Out_of_sync l in if not def_uid_is_in_current_unit then { occurrences; status } else diff --git a/src/analysis/occurrences.mli b/src/analysis/occurrences.mli index 96f57d46c..ea3ff19af 100644 --- a/src/analysis/occurrences.mli +++ b/src/analysis/occurrences.mli @@ -8,6 +8,6 @@ val locs_of : env:Env.t -> typer_result:Mtyper.result -> pos:Lexing.position -> - scope:[ `Project | `Buffer ] -> + scope:[ `Project | `Buffer | `Renaming ] -> string -> t diff --git a/src/commands/new_commands.ml b/src/commands/new_commands.ml index 24738059d..4c30fc537 100644 --- a/src/commands/new_commands.ml +++ b/src/commands/new_commands.ml @@ -522,6 +522,7 @@ let all_commands = match scope with | "buffer" -> (pos, `Buffer) | "project" -> (pos, `Project) + | "renaming" -> (pos, `Renaming) | _ -> failwith "-scope should be one of buffer or project")) ] ~doc: diff --git a/src/commands/query_json.ml b/src/commands/query_json.ml index b85de15ea..8cf930271 100644 --- a/src/commands/query_json.ml +++ b/src/commands/query_json.ml @@ -206,7 +206,8 @@ let dump (type a) : a t -> json = ( "scope", match scope with | `Buffer -> `String "local" - | `Project -> `String "project" ) + | `Project -> `String "project" + | `Renaming -> `String "renaming" ) ] | Refactor_open (action, pos) -> mk "refactor-open" @@ -511,12 +512,9 @@ let json_of_response (type a) (query : a t) (response : a) : json = | Findlib_list, strs -> `List (List.map ~f:Json.string strs) | Extension_list _, strs -> `List (List.map ~f:Json.string strs) | Path_list _, strs -> `List (List.map ~f:Json.string strs) - | Occurrences (_, scope), (occurrences, _project) -> - let with_file = scope = `Project in - `List - (List.map occurrences ~f:(fun occurrence -> - with_location ~with_file occurrence.loc - [ ("stale", Json.bool occurrence.is_stale) ])) + | Occurrences (_, scope), (locations, _project) -> + let with_file = scope = `Project || scope = `Renaming in + `List (List.map locations ~f:(fun loc -> with_location ~with_file loc [])) | Signature_help _, s -> json_of_signature_help s | Version, (version, magic_numbers) -> `Assoc diff --git a/src/frontend/query_protocol.ml b/src/frontend/query_protocol.ml index 5f60dfe19..4239fa6cc 100644 --- a/src/frontend/query_protocol.ml +++ b/src/frontend/query_protocol.ml @@ -268,8 +268,8 @@ type _ t = | Extension_list : [ `All | `Enabled | `Disabled ] -> string list t | Path_list : [ `Build | `Source ] -> string list t | Occurrences (* *) : - [ `Ident_at of Msource.position ] * [ `Project | `Buffer ] - -> (occurrence list * occurrences_status) t + [ `Ident_at of Msource.position ] * [ `Project | `Buffer | `Renaming ] + -> (Location.t list * occurrences_status) t | Signature_help : signature_help -> signature_help_result option t (** In current version, Merlin only uses the parameter [position] to answer signature_help queries. The additionnal parameters are described in the diff --git a/src/index-format/index_format.ml b/src/index-format/index_format.ml index ede54ddc3..bc2906fbc 100644 --- a/src/index-format/index_format.ml +++ b/src/index-format/index_format.ml @@ -18,6 +18,7 @@ end module Lid_set = Set.Make (Lid) module Uid_map = Shape.Uid.Map module Stats = Map.Make (String) +module Uid_set = Shape.Uid.Set let add map uid locs = Uid_map.update uid @@ -33,7 +34,8 @@ type index = approximated : Lid_set.t Uid_map.t; cu_shape : (Compilation_unit.t, Shape.t) Hashtbl.t; stats : stat Stats.t; - root_directory : string option + root_directory : string option; + related_uids : Uid_set.t Union_find.element Uid_map.t } let pp_partials (fmt : Format.formatter) (partials : Lid_set.t Uid_map.t) = @@ -52,6 +54,26 @@ let pp_partials (fmt : Format.formatter) (partials : Lid_set.t Uid_map.t) = partials; Format.fprintf fmt "@]}" +let pp_related_uids (fmt : Format.formatter) + (related_uids : Uid_set.t Union_find.element Uid_map.t) = + let rec gather acc map = + match Uid_map.choose_opt map with + | Some (_key, union) -> + let group = Union_find.get union |> Uid_set.to_list in + List.fold_left (fun acc key -> Uid_map.remove key acc) map group + |> gather (group :: acc) + | None -> acc + in + Format.pp_print_list + ~pp_sep:(fun fmt () -> Format.fprintf fmt ";@;") + (fun fmt group -> + Format.fprintf fmt "(%a)" + (Format.pp_print_list + ~pp_sep:(fun fmt () -> Format.fprintf fmt "@ ") + Shape.Uid.print) + group) + fmt (gather [] related_uids) + let pp (fmt : Format.formatter) pl = Format.fprintf fmt "%i uids:@ {@[" (Uid_map.cardinal pl.defs); Uid_map.iter @@ -74,7 +96,8 @@ let pp (fmt : Format.formatter) pl = (String.concat ";@," (Hashtbl.to_seq_keys pl.cu_shape |> List.of_seq - |> List.map Compilation_unit.full_path_as_string)) + |> List.map Compilation_unit.full_path_as_string)); + Format.fprintf fmt "and related uids:@[{%a}@]" pp_related_uids pl.related_uids let ext = "ocaml-index" diff --git a/src/index-format/index_format.mli b/src/index-format/index_format.mli index 9cc212969..a6a30775d 100644 --- a/src/index-format/index_format.mli +++ b/src/index-format/index_format.mli @@ -7,6 +7,7 @@ module Lid : Set.OrderedType with type t = Longident.t Location.loc module Lid_set : Set.S with type elt = Lid.t module Stats : Map.S with type key = String.t module Uid_map = Shape.Uid.Map +module Uid_set = Shape.Uid.Set type stat = { mtime : float; size : int; source_digest : string option } @@ -15,7 +16,8 @@ type index = approximated : Lid_set.t Uid_map.t; cu_shape : (Compilation_unit.t, Shape.t) Hashtbl.t; stats : stat Stats.t; - root_directory : string option + root_directory : string option; + related_uids : Uid_set.t Union_find.element Uid_map.t } val pp : Format.formatter -> index -> unit diff --git a/src/index-format/union_find.ml b/src/index-format/union_find.ml new file mode 100644 index 000000000..e2d2bb47d --- /dev/null +++ b/src/index-format/union_find.ml @@ -0,0 +1,40 @@ +type 'a content = + | Root of { mutable value : 'a; mutable rank : int } + | Link of { mutable parent : 'a element } +and 'a element = 'a content ref + +let make value = ref (Root { value; rank = 0 }) + +let rec find x = + match !x with + | Root _ -> x + | Link ({ parent; _ } as link) -> + let root = find parent in + if root != parent then link.parent <- root; + root + +let union ~f x y = + let x = find x in + let y = find y in + if x == y then x + else begin + match (!x, !y) with + | ( Root ({ rank = rank_x; value = value_x } as root_x), + Root ({ rank = rank_y; value = value_y } as root_y) ) -> + let new_value = f value_x value_y in + if rank_x < rank_y then ( + x := Link { parent = y }; + root_y.value <- new_value; + y) + else ( + y := Link { parent = x }; + root_x.value <- new_value; + if rank_x = rank_y then root_x.rank <- root_x.rank + 1; + x) + | _ -> assert false + end + +let get elt = + match !(find elt) with + | Root { value; _ } -> value + | Link _ -> assert false diff --git a/src/ocaml-index/lib/index.ml b/src/ocaml-index/lib/index.ml index 29213a371..d73e3ae0d 100644 --- a/src/ocaml-index/lib/index.ml +++ b/src/ocaml-index/lib/index.ml @@ -105,7 +105,7 @@ let init_load_path_once ~do_not_use_cmt_loadpath = let index_of_artifact ~into ~root ~rewrite_root ~build_path ~do_not_use_cmt_loadpath ~shapes ~cmt_loadpath ~cmt_impl_shape ~cmt_modname ~uid_to_loc ~cmt_ident_occurrences ~cmt_initial_env ~cmt_sourcefile - ~cmt_source_digest = + ~cmt_source_digest ~cmt_declaration_dependencies = init_load_path_once ~do_not_use_cmt_loadpath ~dirs:build_path cmt_loadpath; let module Reduce = Shape_reduce.Make (Reduce_conf (struct let shapes = shapes @@ -149,7 +149,26 @@ let index_of_artifact ~into ~root ~rewrite_root ~build_path into.stats with Unix.Unix_error _ -> into.stats) in - { defs; approximated; cu_shape; stats; root_directory = into.root_directory } + let related_uids = + List.fold_left + (fun acc (_, uid1, uid2) -> + let union = Union_find.make (Uid_set.of_list [ uid1; uid2 ]) in + let map_update uid = + Uid_map.update uid (function + | None -> Some union + | Some union' -> + Some (Union_find.union ~f:Uid_set.union union' union)) + in + acc |> map_update uid1 |> map_update uid2) + into.related_uids cmt_declaration_dependencies + in + { defs; + approximated; + cu_shape; + stats; + related_uids; + root_directory = into.root_directory + } let shape_of_artifact ~impl_shape ~modname = let cu_shape = Hashtbl.create 1 in @@ -158,7 +177,8 @@ let shape_of_artifact ~impl_shape ~modname = approximated = Shape.Uid.Map.empty; cu_shape; stats = Stats.empty; - root_directory = None + root_directory = None; + related_uids = Uid_map.empty } let shape_of_cmt { Cmt_format.cmt_impl_shape; cmt_modname; _ } = @@ -176,6 +196,7 @@ let index_of_cmt ~root ~build_path ~shapes cmt_infos = cmt_initial_env; cmt_sourcefile; cmt_source_digest; + cmt_declaration_dependencies; _ } = cmt_infos @@ -188,7 +209,7 @@ let index_of_cmt ~root ~build_path ~shapes cmt_infos = in index_of_artifact ~root ~build_path ~shapes ~cmt_loadpath ~cmt_impl_shape ~cmt_modname ~uid_to_loc ~cmt_ident_occurrences ~cmt_initial_env - ~cmt_sourcefile ~cmt_source_digest + ~cmt_sourcefile ~cmt_source_digest ~cmt_declaration_dependencies let index_of_cms ~root ~build_path ~shapes cms_infos = let { Cms_format.cms_impl_shape; @@ -198,6 +219,7 @@ let index_of_cms ~root ~build_path ~shapes cms_infos = cms_sourcefile; cms_source_digest; cms_initial_env; + cms_declaration_dependencies; _ } = cms_infos @@ -213,14 +235,20 @@ let index_of_cms ~root ~build_path ~shapes cms_infos = ~cmt_ident_occurrences:cms_ident_occurrences ~cmt_initial_env:(Option.value cms_initial_env ~default:Env.empty) ~cmt_sourcefile:cms_sourcefile ~cmt_source_digest:cms_source_digest + ~cmt_declaration_dependencies:cms_declaration_dependencies let merge_index ~store_shapes ~into index = let defs = merge index.defs into.defs in let approximated = merge index.approximated into.approximated in let stats = Stats.union (fun _ f1 _f2 -> Some f1) into.stats index.stats in + let related_uids = + Uid_map.union + (fun _ a b -> Some (Union_find.union ~f:Uid_set.union a b)) + index.related_uids into.related_uids + in if store_shapes then Hashtbl.add_seq into.cu_shape (Hashtbl.to_seq index.cu_shape); - { into with defs; approximated; stats } + { into with defs; approximated; stats; related_uids } let from_files ~store_shapes ~output_file ~root ~rewrite_root ~build_path ~do_not_use_cmt_loadpath files = @@ -230,7 +258,8 @@ let from_files ~store_shapes ~output_file ~root ~rewrite_root ~build_path approximated = Shape.Uid.Map.empty; cu_shape = Hashtbl.create 64; stats = Stats.empty; - root_directory = root + root_directory = root; + related_uids = Uid_map.empty } in let final_index = @@ -273,7 +302,8 @@ let gather_shapes ~output_file files = approximated = Shape.Uid.Map.empty; cu_shape = Hashtbl.create 64; stats = Stats.empty; - root_directory = None + root_directory = None; + related_uids = Uid_map.empty } in let final_index = diff --git a/src/ocaml-index/tests/tests-dirs/demonstate-cms-deps.t b/src/ocaml-index/tests/tests-dirs/demonstate-cms-deps.t index d17ee8eaf..73a886e75 100644 --- a/src/ocaml-index/tests/tests-dirs/demonstate-cms-deps.t +++ b/src/ocaml-index/tests/tests-dirs/demonstate-cms-deps.t @@ -37,6 +37,7 @@ files include the load-path in them: {uid: Bar.0; locs: "Foo.Bar.x": File "main.ml", line 1, characters 14-23 uid: Main.0; locs: "x": File "main.ml", line 1, characters 4-5 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{} If we use cms files and don't include dependencies, ocaml-index will fail to index identifiers from dependencies: @@ -44,6 +45,7 @@ identifiers from dependencies: $ ocaml-index dump main.uideps 1 uids: {uid: Main.0; locs: "x": File "main.ml", line 1, characters 4-5 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{} If we pass a hidden dependency as a visible one, we can run into trouble. Note that ocaml-index believes that "Foo.Bar.x" comes from Foo rather than Bar: @@ -53,6 +55,7 @@ ocaml-index believes that "Foo.Bar.x" comes from Foo rather than Bar: {uid: Foo.0; locs: "Foo.Bar.x": File "main.ml", line 1, characters 14-23 uid: Main.0; locs: "x": File "main.ml", line 1, characters 4-5 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{} If we pass dependencies, we get the correct results: $ ocaml-index aggregate -o main.uideps main.cms -H hidden_lib -I visible_lib @@ -61,6 +64,7 @@ If we pass dependencies, we get the correct results: {uid: Bar.0; locs: "Foo.Bar.x": File "main.ml", line 1, characters 14-23 uid: Main.0; locs: "x": File "main.ml", line 1, characters 4-5 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{} Lastly, check that ocaml-index disambiguates based on order the same as the compiler. Since visible_lib comes first, "Foo" in main.ml corresponds to visible_lib/foo.ml: @@ -71,3 +75,4 @@ Since visible_lib comes first, "Foo" in main.ml corresponds to visible_lib/foo.m {uid: Bar.0; locs: "Foo.Bar.x": File "main.ml", line 1, characters 14-23 uid: Main.0; locs: "x": File "main.ml", line 1, characters 4-5 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{} diff --git a/src/ocaml-index/tests/tests-dirs/gather-shapes.t b/src/ocaml-index/tests/tests-dirs/gather-shapes.t index 0e9aafee2..cbcc04f62 100644 --- a/src/ocaml-index/tests/tests-dirs/gather-shapes.t +++ b/src/ocaml-index/tests/tests-dirs/gather-shapes.t @@ -39,6 +39,7 @@ us to avoid loading the cms files of dependencies. uid: Main.2; locs: "b": File "main.ml", line 3, characters 4-5 uid: Main.3; locs: "Bar": File "main.ml", line 4, characters 7-10 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{} Order matters; if we don't load the shapes file before the cms, we will fail to index properly: @@ -50,3 +51,4 @@ properly: uid: Main.2; locs: "b": File "main.ml", line 3, characters 4-5 uid: Main.3; locs: "Bar": File "main.ml", line 4, characters 7-10 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{} diff --git a/src/ocaml-index/tests/tests-dirs/index-project.t b/src/ocaml-index/tests/tests-dirs/index-project.t index 702c90b6b..2efd1b626 100644 --- a/src/ocaml-index/tests/tests-dirs/index-project.t +++ b/src/ocaml-index/tests/tests-dirs/index-project.t @@ -58,6 +58,7 @@ "+": File "main.ml", line 2, characters 14-15; "+": File "main.ml", line 4, characters 26-27 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{} $ ocaml-index dump foo.uideps 5 uids: @@ -71,6 +72,7 @@ "+": File "foo.ml", line 3, characters 11-12; "+": File "foo.ml", line 3, characters 19-20 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{} @@ -116,6 +118,7 @@ "+": File "main.ml", line 2, characters 14-15; "+": File "main.ml", line 4, characters 26-27 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{} $ ocaml-index stats foo.uideps test.uideps Index "foo.uideps" contains: diff --git a/src/ocaml-index/tests/tests-dirs/interfaces.t b/src/ocaml-index/tests/tests-dirs/interfaces.t index c72eda193..e86247f76 100644 --- a/src/ocaml-index/tests/tests-dirs/interfaces.t +++ b/src/ocaml-index/tests/tests-dirs/interfaces.t @@ -22,3 +22,4 @@ uid: Stdlib__Float.81; locs: "Float.t": File "main.mli", line 1, characters 9-16 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{} diff --git a/src/ocaml-index/tests/tests-dirs/local-shape-and-include.t b/src/ocaml-index/tests/tests-dirs/local-shape-and-include.t index 0f2984c59..ddab394bd 100644 --- a/src/ocaml-index/tests/tests-dirs/local-shape-and-include.t +++ b/src/ocaml-index/tests/tests-dirs/local-shape-and-include.t @@ -36,6 +36,7 @@ uid: Stdlib__String.173; locs: "String.equal": File "main.ml", line 1, characters 8-20 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{(Main.3 Main.4)} $ ocaml-index dump test.uideps @@ -53,4 +54,5 @@ uid: Stdlib__String.173; locs: "String.equal": File "main.ml", line 1, characters 8-20 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{(Main.3 Main.4)} diff --git a/src/ocaml-index/tests/tests-dirs/transitive-deps-cms.t b/src/ocaml-index/tests/tests-dirs/transitive-deps-cms.t index bffc41372..fe6d07383 100644 --- a/src/ocaml-index/tests/tests-dirs/transitive-deps-cms.t +++ b/src/ocaml-index/tests/tests-dirs/transitive-deps-cms.t @@ -38,11 +38,13 @@ between visible and hidden dependencies: uid: Stdlib__List.45; locs: "List.init": File "main.ml", line 1, characters 8-17 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{} $ ocaml-index dump lib1/foo.uideps 1 uids: {uid: Bar; locs: "Bar": File "lib1/foo.ml", line 1, characters 8-11 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{} $ ocaml-index dump test.uideps 5 uids: @@ -55,4 +57,5 @@ between visible and hidden dependencies: uid: Stdlib__List.45; locs: "List.init": File "main.ml", line 1, characters 8-17 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{} diff --git a/src/ocaml-index/tests/tests-dirs/transitive-deps.t b/src/ocaml-index/tests/tests-dirs/transitive-deps.t index c3a754cbc..8846a5984 100644 --- a/src/ocaml-index/tests/tests-dirs/transitive-deps.t +++ b/src/ocaml-index/tests/tests-dirs/transitive-deps.t @@ -33,11 +33,13 @@ uid: Stdlib__List.45; locs: "List.init": File "main.ml", line 1, characters 8-17 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{} $ ocaml-index dump lib1/foo.uideps 1 uids: {uid: Bar; locs: "Bar": File "lib1/foo.ml", line 1, characters 8-11 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{} $ ocaml-index dump test.uideps 5 uids: @@ -50,4 +52,5 @@ uid: Stdlib__List.45; locs: "List.init": File "main.ml", line 1, characters 8-17 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{} diff --git a/tests/test-dirs/occurrences/project-wide/for-renaming/r-modules-and-types.t b/tests/test-dirs/occurrences/project-wide/for-renaming/r-modules-and-types.t new file mode 100644 index 000000000..b25fafd30 --- /dev/null +++ b/tests/test-dirs/occurrences/project-wide/for-renaming/r-modules-and-types.t @@ -0,0 +1,90 @@ + $ cat >lib.mli <<'EOF' + > module type S = sig + > val x : unit + > end + > EOF + + $ cat >lib.ml <<'EOF' + > module type S = sig + > val x : unit + > end + > EOF + + $ cat >main.ml <<'EOF' + > module M : Lib.S = struct + > let x = () + > end + > let () = M.x + > EOF + + $ $OCAMLC -bin-annot -bin-annot-occurrences -c lib.mli lib.ml main.ml + $ ocaml-index aggregate *.cmti *.cmt + + $ ocaml-index dump project.ocaml-index + 6 uids: + {uid: [intf]Lib.0; locs: "x": File "lib.mli", line 2, characters 6-7 + uid: Lib.0; locs: "x": File "lib.ml", line 2, characters 6-7 + uid: Main.0; locs: + "x": File "main.ml", line 2, characters 6-7; + "M.x": File "main.ml", line 4, characters 9-12 + uid: [intf]Lib.1; locs: "S": File "lib.mli", line 1, characters 12-13 + uid: Lib.1; locs: + "S": File "lib.ml", line 1, characters 12-13; + "Lib.S": File "main.ml", line 1, characters 11-16 + uid: Main.1; locs: "M": File "main.ml", line 1, characters 7-8 }, + 0 approx shapes: {}, and shapes for CUS . + and related uids:{([intf]Lib.1 Lib.1); ([intf]Lib.0 Lib.0 Main.0)} + + $ $MERLIN single occurrences -scope renaming -identifier-at 4:11 \ + > -index-file project.ocaml-index \ + > -filename main.ml -filename main.ml -index-file project.ocaml-index \ diff --git a/tests/test-dirs/occurrences/project-wide/pwo-ml-gen.t b/tests/test-dirs/occurrences/project-wide/pwo-ml-gen.t index b3c0a34e4..b1c7a824e 100644 --- a/tests/test-dirs/occurrences/project-wide/pwo-ml-gen.t +++ b/tests/test-dirs/occurrences/project-wide/pwo-ml-gen.t @@ -43,6 +43,7 @@ We should not index generated modules (lib.ml-gen) "foo": File "lib/aux.ml", line 1, characters 4-7; "foo": File "lib/aux.ml", line 2, characters 8-11 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{} $ ocaml-index dump _build/default/.main.eobjs/cctx.ocaml-index 4 uids: @@ -52,6 +53,7 @@ We should not index generated modules (lib.ml-gen) uid: Stdlib.312; locs: "print_string": File "main.ml", line 3, characters 9-21 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{} $ $MERLIN single occurrences -scope project -identifier-at 3:23 \ > -filename main.ml main/.merlin << EOF diff --git a/tests/test-dirs/server-tests/pwo-uid-stability.t b/tests/test-dirs/server-tests/pwo-uid-stability.t index bff5e1ce9..c3d4919e0 100644 --- a/tests/test-dirs/server-tests/pwo-uid-stability.t +++ b/tests/test-dirs/server-tests/pwo-uid-stability.t @@ -19,6 +19,7 @@ "z": File "lib.ml", line 3, characters 4-5; "Lib.z": File "main.ml", line 1, characters 9-14 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{} $ $MERLIN server occurrences -identifier-at 3:4 \ @@ -68,6 +69,7 @@ Now we insert a def before z: "z": File "lib.ml", line 3, characters 4-5; "Lib.z": File "main.ml", line 1, characters 9-14 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{} We are not missing the occurrence in main.ml $ $MERLIN server occurrences -identifier-at 3:4 \ @@ -115,6 +117,7 @@ We are not missing the occurrence in main.ml "z": File "lib.ml", line 3, characters 4-5; "Lib.z": File "main.ml", line 1, characters 9-14 }, 0 approx shapes: {}, and shapes for CUS . + and related uids:{} We are not missing the occurrence in main.ml $ $MERLIN server occurrences -identifier-at 3:4 \ From e9e292df7dc582e200f9e265e493c9d4ba25e183 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulysse=20G=C3=A9rard?= Date: Wed, 9 Apr 2025 18:20:28 -0400 Subject: [PATCH 5/7] Merge changes adding granular marshal (ocaml/merlin#1889) --- CHANGES.md | 2 + src/analysis/occurrences.ml | 28 +- src/dot-merlin/dot_merlin_reader.ml | 1 - src/index-format/granular_map.ml | 308 ++++++++++++++++++ src/index-format/granular_map.mli | 44 +++ src/index-format/granular_marshal.ml | 191 +++++++++++ src/index-format/granular_marshal.mli | 65 ++++ src/index-format/granular_set.ml | 278 ++++++++++++++++ src/index-format/granular_set.mli | 36 ++ src/index-format/index_format.ml | 103 +++--- src/index-format/index_format.mli | 20 +- src/index-format/lid.ml | 71 ++++ src/ocaml-index/lib/dune | 19 +- src/ocaml-index/lib/index.ml | 30 +- src/ocaml/utils/directory_content_cache.ml | 1 - src/utils/file_cache.ml | 3 +- .../occurrences/project-wide/union.t | 54 +++ 17 files changed, 1172 insertions(+), 82 deletions(-) create mode 100644 src/index-format/granular_map.ml create mode 100644 src/index-format/granular_map.mli create mode 100644 src/index-format/granular_marshal.ml create mode 100644 src/index-format/granular_marshal.mli create mode 100644 src/index-format/granular_set.ml create mode 100644 src/index-format/granular_set.mli create mode 100644 src/index-format/lid.ml create mode 100644 tests/test-dirs/occurrences/project-wide/union.t diff --git a/CHANGES.md b/CHANGES.md index a7988d1b2..dcb47870e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -9,6 +9,8 @@ unreleased - Perform less merges in the indexer (#1881) - Add initial support for project-wide renaming: occurrences can now return all usages of all related definitions. (#1877) + + ocaml-index + - Bump magic number after index file format change. Index can now be read lazilly (#1886) merlin 5.2 ========== diff --git a/src/analysis/occurrences.ml b/src/analysis/occurrences.ml index 79062e2ae..32576594d 100644 --- a/src/analysis/occurrences.ml +++ b/src/analysis/occurrences.ml @@ -179,7 +179,9 @@ end let get_buffer_locs result uid = Stamped_hashtable.fold (fun (uid', loc) () acc -> - if Shape.Uid.equal uid uid' then Lid_set.add loc acc else acc) + if Shape.Uid.equal uid uid' then + Lid_set.add (Index_format.Lid.of_lid loc) acc + else acc) (Mtyper.get_index result) Lid_set.empty let get_external_locs ~(config : Mconfig.t) ~current_buffer_path uid = @@ -200,7 +202,10 @@ let get_external_locs ~(config : Mconfig.t) ~current_buffer_path uid = Option.map external_locs ~f:(fun (index, locs) -> let stats = Stat_check.create ~cache_size:128 index in ( Lid_set.filter - (fun ({ loc; _ } as lid) -> + (fun lid -> + let ({ Location.loc; _ } as lid) = + Index_format.Lid.to_lid lid + in let is_current_buffer = (* We filter external results that concern the current buffer *) let file = loc.Location.loc_start.Lexing.pos_fname in @@ -233,12 +238,12 @@ let lookup_related_uids_in_indexes ~(config : Mconfig.t) uid = let title = "lookup_related_uids_in_indexes" in let open Index_format in let related_uids = - List.fold_left ~init:Uid_map.empty config.merlin.index_files + List.fold_left ~init:(Uid_map.empty ()) config.merlin.index_files ~f:(fun acc index_file -> try let index = Index_cache.read index_file in Uid_map.union - (fun _ a b -> Some (Union_find.union ~f:Uid_set.union a b)) + (fun _ a b -> Some (Union_find.union a b)) index.related_uids acc with Index_format.Not_an_index _ | Sys_error _ -> log ~title "Could not load index %s" index_file; @@ -346,14 +351,23 @@ let locs_of ~config ~env ~typer_result ~pos ~scope path = (Occurrence_set.union acc_locs locs, String.Set.union acc_files files)) external_occurrences in - let occurrences = - Occurrence_set.union buffer_occurrences external_occurrences + let locs = Lid_set.union buffer_locs external_locs in + (* Some of the paths may have redundant `.`s or `..`s in them. Although canonicalizing + is not necessary for correctness, it makes the output a bit nicer. *) + let canonicalize_file_in_loc lid = + let ({ txt; loc } : 'a Location.loc) = Index_format.Lid.to_lid lid in + let file = + Misc.canonicalize_filename ?cwd:config.merlin.source_root + loc.loc_start.pos_fname + in + Index_format.Lid.of_lid { txt; loc = set_fname ~file loc } in let locs = Lid_set.map canonicalize_file_in_loc locs in let locs = log ~title:"occurrences" "Found %i locs" (Lid_set.cardinal locs); Lid_set.elements locs - |> List.filter_map ~f:(fun { Location.txt; loc } -> + |> List.filter_map ~f:(fun lid -> + let { Location.txt; loc } = Index_format.Lid.to_lid lid in let lid = try Longident.head txt with _ -> "not flat lid" in log ~title:"occurrences" "Found occ: %s %a" lid Logger.fmt (Fun.flip Location.print_loc loc); diff --git a/src/dot-merlin/dot_merlin_reader.ml b/src/dot-merlin/dot_merlin_reader.ml index 2c0b4338a..3feeb7d95 100644 --- a/src/dot-merlin/dot_merlin_reader.ml +++ b/src/dot-merlin/dot_merlin_reader.ml @@ -140,7 +140,6 @@ module Cache = File_cache.Make (struct | exn -> close_in_noerr ic; raise exn - let cache_name = "Mconfig_dot" end) diff --git a/src/index-format/granular_map.ml b/src/index-format/granular_map.ml new file mode 100644 index 000000000..e4d2d74c7 --- /dev/null +++ b/src/index-format/granular_map.ml @@ -0,0 +1,308 @@ +(**************************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* en Automatique. *) +(* *) +(* All rights reserved. This file is distributed under the terms of *) +(* the GNU Lesser General Public License version 2.1, with the *) +(* special exception on linking described in the file LICENSE. *) +(* *) +(**************************************************************************) + +open Granular_marshal + +module type S = sig + type key + type 'a t + + val empty : unit -> 'a t + val bindings : 'a t -> (key * 'a) list + val add : key -> 'a -> 'a t -> 'a t + val singleton : key -> 'a -> 'a t + val remove : key -> 'a t -> 'a t + val union : (key -> 'a -> 'a -> 'a option) -> 'a t -> 'a t -> 'a t + val cardinal : 'a t -> int + val find : key -> 'a t -> 'a + val find_opt : key -> 'a t -> 'a option + val choose_opt : 'a t -> (key * 'a) option + val iter : (key -> 'a -> unit) -> 'a t -> unit + val fold : (key -> 'a -> 'acc -> 'acc) -> 'a t -> 'acc -> 'acc + val map : ('a -> 'b) -> 'a t -> 'b t + val is_empty : 'a t -> bool + val mem : key -> 'a t -> bool + val update : key -> ('a option -> 'a option) -> 'a t -> 'a t + val schema : + 'a t Type.Id.t -> + Granular_marshal.iter -> + (Granular_marshal.iter -> key -> 'a -> unit) -> + 'a t -> + unit +end + +module Make (Ord : Map.OrderedType) = struct + type key = Ord.t + type 'a t = 'a s link + and 'a s = Empty | Node of { l : 'a t; v : key; d : 'a; r : 'a t; h : int } + + let empty () = link Empty + + let height s = + match fetch s with + | Empty -> 0 + | Node { h; _ } -> h + + let create (l : 'a t) x d (r : 'a t) : 'a t = + let hl = height l and hr = height r in + link (Node { l; v = x; d; r; h = (if hl >= hr then hl + 1 else hr + 1) }) + + let singleton x d = + let empty = empty () in + link (Node { l = empty; v = x; d; r = empty; h = 1 }) + + let bal (l : 'a t) x d (r : 'a t) : 'a t = + let hl = + match fetch l with + | Empty -> 0 + | Node { h; _ } -> h + in + let hr = + match fetch r with + | Empty -> 0 + | Node { h; _ } -> h + in + if hl > hr + 2 then begin + match fetch l with + | Empty -> invalid_arg "Map.bal" + | Node { l = ll; v = lv; d = ld; r = lr; _ } -> + if height ll >= height lr then create ll lv ld (create lr x d r) + else begin + match fetch lr with + | Empty -> invalid_arg "Map.bal" + | Node { l = lrl; v = lrv; d = lrd; r = lrr; _ } -> + create (create ll lv ld lrl) lrv lrd (create lrr x d r) + end + end + else if hr > hl + 2 then begin + match fetch r with + | Empty -> invalid_arg "Map.bal" + | Node { l = rl; v = rv; d = rd; r = rr; _ } -> + if height rr >= height rl then create (create l x d rl) rv rd rr + else begin + match fetch rl with + | Empty -> invalid_arg "Map.bal" + | Node { l = rll; v = rlv; d = rld; r = rlr; _ } -> + create (create l x d rll) rlv rld (create rlr rv rd rr) + end + end + else + link (Node { l; v = x; d; r; h = (if hl >= hr then hl + 1 else hr + 1) }) + + let rec bindings_aux accu s = + match fetch s with + | Empty -> accu + | Node { l; v; d; r; _ } -> bindings_aux ((v, d) :: bindings_aux accu r) l + + let bindings t = bindings_aux [] t + + let is_empty s = + match fetch s with + | Empty -> true + | _ -> false + + let rec add x data s : 'a t = + match fetch s with + | Empty -> link (Node { l = s; v = x; d = data; r = s; h = 1 }) + | Node { l; v; d; r; h } -> + let c = Ord.compare x v in + if c = 0 then + if d == data then s else link (Node { l; v = x; d = data; r; h }) + else if c < 0 then + let ll = add x data l in + if l == ll then s else bal ll v d r + else + let rr = add x data r in + if r == rr then s else bal l v d rr + + let rec find x s = + match fetch s with + | Empty -> raise Not_found + | Node { l; v; d; r; _ } -> + let c = Ord.compare x v in + if c = 0 then d else find x (if c < 0 then l else r) + + let rec find_opt x s = + match fetch s with + | Empty -> None + | Node { l; v; d; r; _ } -> + let c = Ord.compare x v in + if c = 0 then Some d else find_opt x (if c < 0 then l else r) + + let rec mem x s = + match fetch s with + | Empty -> false + | Node { l; v; r; _ } -> + let c = Ord.compare x v in + c = 0 || mem x (if c < 0 then l else r) + + let rec min_binding (t : 'a t) : key * 'a = + match fetch t with + | Empty -> raise Not_found + | Node { l; v; d; _ } when fetch l = Empty -> (v, d) + | Node { l; _ } -> min_binding l + + let choose_opt t = try Some (min_binding t) with Not_found -> None + + let rec remove_min_binding (t : 'a t) : 'a t = + match fetch t with + | Empty -> invalid_arg "Map.remove_min_elt" + | Node { l; r; _ } when fetch l = Empty -> r + | Node { l; v; d; r; _ } -> bal (remove_min_binding l) v d r + + let merge (t1 : 'a t) (t2 : 'a t) : 'a t = + match (fetch t1, fetch t2) with + | Empty, _t -> t2 + | _t, Empty -> t1 + | _, _ -> + let x, d = min_binding t2 in + bal t1 x d (remove_min_binding t2) + + let rec remove x s : 'a t = + match fetch s with + | Empty -> s + | Node { l; v; d; r; _ } -> + let c = Ord.compare x v in + if c = 0 then merge l r + else if c < 0 then + let ll = remove x l in + if l == ll then s else bal ll v d r + else + let rr = remove x r in + if r == rr then s else bal l v d rr + + let rec iter f s = + match fetch s with + | Empty -> () + | Node { l; v; d; r; _ } -> + iter f l; + f v d; + iter f r + + let rec map f s = + match fetch s with + | Empty -> empty () + | Node { l; v; d; r; h } -> + let l' = map f l in + let d' = f d in + let r' = map f r in + link (Node { l = l'; v; d = d'; r = r'; h }) + + let rec fold f m accu = + match fetch m with + | Empty -> accu + | Node { l; v; d; r; _ } -> fold f r (f v d (fold f l accu)) + + let rec add_min_binding k x s = + match fetch s with + | Empty -> singleton k x + | Node { l; v; d; r; _ } -> bal (add_min_binding k x l) v d r + + let rec add_max_binding k x s = + match fetch s with + | Empty -> singleton k x + | Node { l; v; d; r; _ } -> bal l v d (add_max_binding k x r) + + let rec join (l : 'a t) v d (r : 'a t) = + match (fetch l, fetch r) with + | Empty, _ -> add_min_binding v d r + | _, Empty -> add_max_binding v d l + | ( Node { l = ll; v = lv; d = ld; r = lr; h = lh }, + Node { l = rl; v = rv; d = rd; r = rr; h = rh } ) -> + if lh > rh + 2 then bal ll lv ld (join lr v d r) + else if rh > lh + 2 then bal (join l v d rl) rv rd rr + else create l v d r + + let concat (t1 : 'a t) (t2 : 'a t) : 'a t = + match (fetch t1, fetch t2) with + | Empty, _t -> t2 + | _t, Empty -> t1 + | _, _ -> + let x, d = min_binding t2 in + join t1 x d (remove_min_binding t2) + + let concat_or_join t1 v d t2 = + match d with + | Some d -> join t1 v d t2 + | None -> concat t1 t2 + + let rec split x s = + match fetch s with + | Empty -> (s, None, s) + | Node { l; v; d; r; _ } -> + let c = Ord.compare x v in + if c = 0 then (l, Some d, r) + else if c < 0 then + let ll, pres, rl = split x l in + (ll, pres, join rl v d r) + else + let lr, pres, rr = split x r in + (join l v d lr, pres, rr) + + let rec union f (s1 : 'a t) (s2 : 'a t) : 'a t = + match (fetch s1, fetch s2) with + | _, Empty -> s1 + | Empty, _ -> s2 + | ( Node { l = l1; v = v1; d = d1; r = r1; h = h1 }, + Node { l = l2; v = v2; d = d2; r = r2; h = h2 } ) -> ( + if h1 >= h2 then + let l2, d2, r2 = split v1 s2 in + let l = union f l1 l2 and r = union f r1 r2 in + match d2 with + | None -> join l v1 d1 r + | Some d2 -> concat_or_join l v1 (f v1 d1 d2) r + else + let l1, d1, r1 = split v2 s1 in + let l = union f l1 l2 and r = union f r1 r2 in + match d1 with + | None -> join l v2 d2 r + | Some d1 -> concat_or_join l v2 (f v2 d1 d2) r) + + let rec cardinal s = + match fetch s with + | Empty -> 0 + | Node { l; r; _ } -> cardinal l + 1 + cardinal r + + let rec update x f t = + match fetch t with + | Empty -> begin + match f None with + | None -> t + | Some data -> link (Node { l = t; v = x; d = data; r = t; h = 1 }) + end + | Node { l; v; d; r; h } -> + let c = Ord.compare x v in + if c = 0 then begin + match f (Some d) with + | None -> merge l r + | Some data -> + if d == data then t else link (Node { l; v = x; d = data; r; h }) + end + else if c < 0 then + let ll = update x f l in + if l == ll then t else bal ll v d r + else + let rr = update x f r in + if r == rr then t else bal l v d rr + + let rec schema type_id iter f m = + iter.yield m type_id @@ fun iter tree -> + match tree with + | Empty -> () + | Node { l; v; d; r; _ } -> + schema type_id iter f l; + f iter v d; + schema type_id iter f r +end diff --git a/src/index-format/granular_map.mli b/src/index-format/granular_map.mli new file mode 100644 index 000000000..efb004047 --- /dev/null +++ b/src/index-format/granular_map.mli @@ -0,0 +1,44 @@ +(**************************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* en Automatique. *) +(* *) +(* All rights reserved. This file is distributed under the terms of *) +(* the GNU Lesser General Public License version 2.1, with the *) +(* special exception on linking described in the file LICENSE. *) +(* *) +(**************************************************************************) + +module type S = sig + type key + type 'a t + + val empty : unit -> 'a t + val bindings : 'a t -> (key * 'a) list + val add : key -> 'a -> 'a t -> 'a t + val singleton : key -> 'a -> 'a t + val remove : key -> 'a t -> 'a t + val union : (key -> 'a -> 'a -> 'a option) -> 'a t -> 'a t -> 'a t + val cardinal : 'a t -> int + val find : key -> 'a t -> 'a + val find_opt : key -> 'a t -> 'a option + val choose_opt : 'a t -> (key * 'a) option + val iter : (key -> 'a -> unit) -> 'a t -> unit + val fold : (key -> 'a -> 'acc -> 'acc) -> 'a t -> 'acc -> 'acc + val map : ('a -> 'b) -> 'a t -> 'b t + val is_empty : 'a t -> bool + val mem : key -> 'a t -> bool + val update : key -> ('a option -> 'a option) -> 'a t -> 'a t + val schema : + 'a t Type.Id.t -> + Granular_marshal.iter -> + (Granular_marshal.iter -> key -> 'a -> unit) -> + 'a t -> + unit +end + +module Make (Ord : Map.OrderedType) : S with type key = Ord.t diff --git a/src/index-format/granular_marshal.ml b/src/index-format/granular_marshal.ml new file mode 100644 index 000000000..67dd499fb --- /dev/null +++ b/src/index-format/granular_marshal.ml @@ -0,0 +1,191 @@ +module Cache = Hashtbl.Make (Int) + +type store = { filename : string; cache : any_link Cache.t } + +and any_link = Link : 'a link * 'a link Type.Id.t -> any_link + +and 'a link = 'a repr ref + +and 'a repr = + | Small of 'a + | Serialized of { loc : int } + | Serialized_reused of { loc : int } + | On_disk of { store : store; loc : int; schema : 'a schema } + | In_memory of 'a + | In_memory_reused of 'a + | Duplicate of 'a link + | Placeholder + +and 'a schema = iter -> 'a -> unit + +and iter = { yield : 'a. 'a link -> 'a link Type.Id.t -> 'a schema -> unit } + +let schema_no_sublinks : _ schema = fun _ _ -> () + +let link v = ref (In_memory v) + +let rec normalize lnk = + match !lnk with + | Duplicate lnk -> normalize lnk + | _ -> lnk + +let read_loc store fd loc schema = + seek_in fd loc; + let v = Marshal.from_channel fd in + let rec iter = + { yield = + (fun (type a) (lnk : a link) type_id schema -> + match !lnk with + | Small v -> + schema iter v; + lnk := In_memory v + | Serialized { loc } -> lnk := On_disk { store; loc; schema } + | Serialized_reused { loc } -> ( + match Cache.find store.cache loc with + | Link (type b) ((lnk', type_id') : b link * _) -> ( + match Type.Id.provably_equal type_id type_id' with + | Some (Equal : (a link, b link) Type.eq) -> + lnk := Duplicate (normalize lnk') + | None -> + invalid_arg + "Granular_marshal.read_loc: reuse of a different type") + | exception Not_found -> + lnk := On_disk { store; loc; schema }; + Cache.add store.cache loc (Link (lnk, type_id))) + | In_memory _ | In_memory_reused _ | On_disk _ | Duplicate _ -> () + | Placeholder -> invalid_arg "Granular_marshal.read_loc: Placeholder") + } + in + schema iter v; + v + +let last_open_store = ref None + +let () = + at_exit (fun () -> + match !last_open_store with + | None -> () + | Some (_, fd) -> close_in fd) + +let force_open_store store = + let fd = open_in_bin store.filename in + last_open_store := Some (store, fd); + fd + +let open_store store = + match !last_open_store with + | Some (store', fd) when store == store' -> fd + | Some (_, fd) -> + close_in fd; + force_open_store store + | None -> force_open_store store + +let fetch_loc store loc schema = + let fd = open_store store in + let v = read_loc store fd loc schema in + v + +let rec fetch lnk = + match !lnk with + | In_memory v | In_memory_reused v -> v + | Serialized _ | Serialized_reused _ | Small _ -> + invalid_arg "Granular_marshal.fetch: serialized" + | Placeholder -> invalid_arg "Granular_marshal.fetch: during a write" + | Duplicate original_lnk -> + let v = fetch original_lnk in + lnk := In_memory v; + v + | On_disk { store; loc; schema } -> + let v = fetch_loc store loc schema in + lnk := In_memory v; + v + +let reuse lnk = + match !lnk with + | In_memory v -> lnk := In_memory_reused v + | In_memory_reused _ -> () + | _ -> invalid_arg "Granular_marshal.reuse: not in memory" + +let cache (type a) (module Key : Hashtbl.HashedType with type t = a) = + let module H = Hashtbl.Make (Key) in + let cache = H.create 16 in + fun (lnk : a link) -> + let key = fetch lnk in + match H.find cache key with + | original_lnk -> + assert (original_lnk != lnk); + reuse original_lnk; + lnk := Duplicate original_lnk + | exception Not_found -> H.add cache key lnk + +let ptr_size = 8 + +let binstring_of_int v = + String.init ptr_size (fun i -> Char.chr ((v lsr i lsl 3) land 255)) + +let int_of_binstring s = + Array.fold_right + (fun v acc -> (acc lsl 8) + v) + (Array.init ptr_size (fun i -> Char.code s.[i])) + 0 + +let write ?(flags = []) fd root_schema root_value = + let pt_root = pos_out fd in + output_string fd (String.make ptr_size '\000'); + let rec iter size ~placeholders ~restore = + { yield = + (fun (type a) (lnk : a link) _type_id (schema : a schema) : unit -> + match !lnk with + | Serialized _ | Serialized_reused _ | Small _ -> () + | Placeholder -> failwith "big nono" + | In_memory_reused v -> write_child_reused lnk schema v + | Duplicate original_lnk -> + (match !original_lnk with + | Serialized_reused _ -> () + | In_memory_reused v -> write_child_reused original_lnk schema v + | _ -> failwith "Granular_marshal.write: duplicate not reused"); + lnk := !original_lnk + | In_memory v -> write_child lnk schema v size ~placeholders ~restore + | On_disk _ -> + write_child lnk schema (fetch lnk) size ~placeholders ~restore) + } + and write_child : type a. a link -> a schema -> a -> _ = + fun lnk schema v size ~placeholders ~restore -> + let v_size = write_children schema v in + if v_size > 1024 then ( + lnk := Serialized { loc = pos_out fd }; + Marshal.to_channel fd v flags) + else ( + size := !size + v_size; + placeholders := (fun () -> lnk := Placeholder) :: !placeholders; + restore := (fun () -> lnk := Small v) :: !restore) + and write_children : type a. a schema -> a -> int = + fun schema v -> + let children_size = ref 0 in + let placeholders = ref [] in + let restore = ref [] in + schema (iter children_size ~placeholders ~restore) v; + List.iter (fun placehold -> placehold ()) !placeholders; + let v_size = Obj.(reachable_words (repr v)) in + List.iter (fun restore -> restore ()) !restore; + !children_size + v_size + and write_child_reused : type a. a link -> a schema -> a -> _ = + fun lnk schema v -> + let children_size = ref 0 in + let placeholders = ref [] in + let restore = ref [] in + schema (iter children_size ~placeholders ~restore) v; + lnk := Serialized_reused { loc = pos_out fd }; + Marshal.to_channel fd v flags + in + let _ : int = write_children root_schema root_value in + let root_loc = pos_out fd in + Marshal.to_channel fd root_value flags; + seek_out fd pt_root; + output_string fd (binstring_of_int root_loc) + +let read filename fd root_schema = + let store = { filename; cache = Cache.create 0 } in + let root_loc = int_of_binstring (really_input_string fd 8) in + let root_value = read_loc store fd root_loc root_schema in + root_value diff --git a/src/index-format/granular_marshal.mli b/src/index-format/granular_marshal.mli new file mode 100644 index 000000000..c8c543a94 --- /dev/null +++ b/src/index-format/granular_marshal.mli @@ -0,0 +1,65 @@ +(** A pointer to an ['a] value, either residing in memory or on disk. *) +type 'a link + +(** [link v] returns a new link to the in-memory value [v]. *) +val link : 'a -> 'a link + +(** [reuse lnk] marks the link as being used more than once, to ensure proper + serialization of DAGs. *) +val reuse : 'a link -> unit + +(** [cache (module Hash)] returns a function to de-duplicate links which share + the same value, resulting in a compressed file. *) +val cache : 'a. (module Hashtbl.HashedType with type t = 'a) -> 'a link -> unit + +(** [fetch lnk] returns the value pointed by the link [lnk]. + + We of course have [fetch (link v) = v] and [link (fetch lnk) = lnk]. *) +val fetch : 'a link -> 'a + +(** For Merlin we can't depend on a PPX or external dependencies, + so we require a user-defined {!schema} to describe where the links can be + found. This is just an iter traversal over the values, recursively + yielding on any reachable link. Since links can point to values themselves + containing links, recursion is delayed by asking for the schema of each + child. + + For example, the following type has the following schema: + + {[ + type t = { first : string link ; second : int link list link } + + let type_first : string link Type.Id.t = Type.Id.make () + let type_second : int link list link Type.Id.t = Type.Id.make () + let type_v : int link Type.Id.t = Type.Id.make () + + let schema : t schema = fun iter t -> + iter.yield t.first type_first schema_no_sublinks ; + iter.yield t.second type_second @@ fun iter lst -> + List.iter (fun v -> iter.yield v type_v schema_no_sublinks) lst + ]} + + where {!schema_no_sublinks} indicates that the yielded value contains + no reachable links. *) + +(** A function to iter on every {!link} reachable in the value ['a]. *) +type 'a schema = iter -> 'a -> unit + +(** A callback to signal the reachable links and the schema of their pointed + sub-value. Since a value can contain multiple links each pointing to + different types of values, the callback is polymorphic. *) +and iter = { yield : 'a. 'a link -> 'a link Type.Id.t -> 'a schema -> unit } + +(** A schema usable when the ['a] value does not contain any links. *) +val schema_no_sublinks : 'a schema + +(** [write oc schema value] writes the [value] in the output channel [oc], + creating unmarshalling boundaries on every link in [value] specified + by the [schema]. *) +val write : + ?flags:Marshal.extern_flags list -> out_channel -> 'a schema -> 'a -> unit + +(** [read ic schema] reads the value marshalled in the input channel [ic], + stopping the unmarshalling on every link boundary indicated by the [schema]. + It returns the root [value] read. *) +val read : string -> in_channel -> 'a schema -> 'a diff --git a/src/index-format/granular_set.ml b/src/index-format/granular_set.ml new file mode 100644 index 000000000..7af3c62ba --- /dev/null +++ b/src/index-format/granular_set.ml @@ -0,0 +1,278 @@ +(**************************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* en Automatique. *) +(* *) +(* All rights reserved. This file is distributed under the terms of *) +(* the GNU Lesser General Public License version 2.1, with the *) +(* special exception on linking described in the file LICENSE. *) +(* *) +(**************************************************************************) + +open Granular_marshal + +module type S = sig + type elt + type t + + val empty : t + val add : elt -> t -> t + val is_empty : t -> bool + val mem : elt -> t -> bool + val singleton : elt -> t + val remove : elt -> t -> t + val filter : (elt -> bool) -> t -> t + val union : t -> t -> t + val map : (elt -> elt) -> t -> t + val iter : (elt -> unit) -> t -> unit + val cardinal : t -> int + val elements : t -> elt list + val schema : + Granular_marshal.iter -> (Granular_marshal.iter -> elt -> unit) -> t -> unit +end + +module Make (Ord : Set.OrderedType) = struct + type elt = Ord.t + + type t = s link + and s = Empty | Node of { l : t; v : elt; r : t; h : int } + + let height t = + match fetch t with + | Empty -> 0 + | Node { h; _ } -> h + + let create (l : t) v (r : t) : t = + let hl = + match fetch l with + | Empty -> 0 + | Node { h; _ } -> h + in + let hr = + match fetch r with + | Empty -> 0 + | Node { h; _ } -> h + in + link (Node { l; v; r; h = (if hl >= hr then hl + 1 else hr + 1) }) + + let bal (l : t) v (r : t) = + let hl = + match fetch l with + | Empty -> 0 + | Node { h; _ } -> h + in + let hr = + match fetch r with + | Empty -> 0 + | Node { h; _ } -> h + in + if hl > hr + 2 then begin + match fetch l with + | Empty -> invalid_arg "Set.bal" + | Node { l = ll; v = lv; r = lr; _ } -> + if height ll >= height lr then create ll lv (create lr v r) + else begin + match fetch lr with + | Empty -> invalid_arg "Set.bal" + | Node { l = lrl; v = lrv; r = lrr; _ } -> + create (create ll lv lrl) lrv (create lrr v r) + end + end + else if hr > hl + 2 then begin + match fetch r with + | Empty -> invalid_arg "Set.bal" + | Node { l = rl; v = rv; r = rr; _ } -> + if height rr >= height rl then create (create l v rl) rv rr + else begin + match fetch rl with + | Empty -> invalid_arg "Set.bal" + | Node { l = rll; v = rlv; r = rlr; _ } -> + create (create l v rll) rlv (create rlr rv rr) + end + end + else link (Node { l; v; r; h = (if hl >= hr then hl + 1 else hr + 1) }) + + let empty = link Empty + + let rec add x t : t = + match fetch t with + | Empty -> link (Node { l = link Empty; v = x; r = link Empty; h = 1 }) + | Node { l; v; r; _ } as t -> + let c = Ord.compare x v in + if c = 0 then link t + else if c < 0 then + let ll = add x l in + if l == ll then link t else bal ll v r + else + let rr = add x r in + if r == rr then link t else bal l v rr + + let singleton x = link (Node { l = link Empty; v = x; r = link Empty; h = 1 }) + + let rec min_elt t = + match fetch t with + | Empty -> raise Not_found + | Node { l; v; _ } when fetch l = Empty -> v + | Node { l; _ } -> min_elt l + + let rec remove_min_elt t = + match fetch t with + | Empty -> invalid_arg "Set.remove_min_elt" + | Node { l; r; _ } when fetch l = Empty -> r + | Node { l; v; r; _ } -> bal (remove_min_elt l) v r + + let merge t1 t2 = + match (fetch t1, fetch t2) with + | Empty, _ -> t2 + | _, Empty -> t1 + | _, _ -> bal t1 (min_elt t2) (remove_min_elt t2) + + let is_empty t = + match fetch t with + | Empty -> true + | _ -> false + + let rec mem x t = + match fetch t with + | Empty -> false + | Node { l; v; r; _ } -> + let c = Ord.compare x v in + c = 0 || mem x (if c < 0 then l else r) + + let rec remove x t = + match fetch t with + | Empty -> link Empty + | Node { l; v; r; _ } as t -> + let c = Ord.compare x v in + if c = 0 then merge l r + else if c < 0 then + let ll = remove x l in + if l == ll then link t else bal ll v r + else + let rr = remove x r in + if r == rr then link t else bal l v rr + + let rec add_min_element x t = + match fetch t with + | Empty -> singleton x + | Node { l; v; r; _ } -> bal (add_min_element x l) v r + + let rec add_max_element x t = + match fetch t with + | Empty -> singleton x + | Node { l; v; r; _ } -> bal l v (add_max_element x r) + + let rec join (l : t) v (r : t) = + match (fetch l, fetch r) with + | Empty, _ -> add_min_element v r + | _, Empty -> add_max_element v l + | ( Node { l = ll; v = lv; r = lr; h = lh }, + Node { l = rl; v = rv; r = rr; h = rh } ) -> + if lh > rh + 2 then bal ll lv (join lr v r) + else if rh > lh + 2 then bal (join l v rl) rv rr + else create l v r + + let rec max_elt t = + match fetch t with + | Empty -> raise Not_found + | Node { v; r; _ } when fetch r = Empty -> v + | Node { r; _ } -> max_elt r + + let concat t1 t2 = + match (fetch t1, fetch t2) with + | Empty, _ -> t2 + | _, Empty -> t1 + | _, _ -> join t1 (min_elt t2) (remove_min_elt t2) + + let rec split x t = + match fetch t with + | Empty -> (link Empty, false, link Empty) + | Node { l; v; r; _ } -> + let c = Ord.compare x v in + if c = 0 then (l, true, r) + else if c < 0 then + let ll, pres, rl = split x l in + (ll, pres, join rl v r) + else + let lr, pres, rr = split x r in + (join l v lr, pres, rr) + + let rec union t1 t2 = + match (fetch t1, fetch t2) with + | Empty, _ -> t2 + | _, Empty -> t1 + | ( Node { l = l1; v = v1; r = r1; h = h1 }, + Node { l = l2; v = v2; r = r2; h = h2 } ) -> + if h1 >= h2 then + if h2 = 1 then add v2 t1 + else begin + let l2, _, r2 = split v1 t2 in + join (union l1 l2) v1 (union r1 r2) + end + else if h1 = 1 then add v1 t2 + else begin + let l1, _, r1 = split v2 t1 in + join (union l1 l2) v2 (union r1 r2) + end + + let rec filter p t = + match fetch t with + | Empty -> link Empty + | Node { l; v; r; _ } as t -> + let l' = filter p l in + let pv = p v in + let r' = filter p r in + if pv then if l == l' && r == r' then link t else join l' v r' + else concat l' r' + + let rec cardinal t = + match fetch t with + | Empty -> 0 + | Node { l; r; _ } -> cardinal l + 1 + cardinal r + + let rec elements_aux accu t = + match fetch t with + | Empty -> accu + | Node { l; v; r; _ } -> elements_aux (v :: elements_aux accu r) l + + let elements s = elements_aux [] s + + let try_join l v r = + if + (fetch l = Empty || Ord.compare (max_elt l) v < 0) + && (fetch r = Empty || Ord.compare v (min_elt r) < 0) + then join l v r + else union l (add v r) + + let rec map f t = + match fetch t with + | Empty -> link Empty + | Node { l; v; r; _ } as t -> + let l' = map f l in + let v' = f v in + let r' = map f r in + if l == l' && v == v' && r == r' then link t else try_join l' v' r' + + let rec iter f t = + match fetch t with + | Empty -> () + | Node { l; v; r; _ } -> + iter f l; + f v; + iter f r + + let type_id = Type.Id.make () + + let rec schema iter f m = + iter.yield m type_id @@ fun iter tree -> + match tree with + | Empty -> () + | Node { l; v; r; _ } -> + schema iter f l; + f iter v; + schema iter f r +end diff --git a/src/index-format/granular_set.mli b/src/index-format/granular_set.mli new file mode 100644 index 000000000..c0565d3cf --- /dev/null +++ b/src/index-format/granular_set.mli @@ -0,0 +1,36 @@ +(**************************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* en Automatique. *) +(* *) +(* All rights reserved. This file is distributed under the terms of *) +(* the GNU Lesser General Public License version 2.1, with the *) +(* special exception on linking described in the file LICENSE. *) +(* *) +(**************************************************************************) + +module type S = sig + type elt + type t + + val empty : t + val add : elt -> t -> t + val is_empty : t -> bool + val mem : elt -> t -> bool + val singleton : elt -> t + val remove : elt -> t -> t + val filter : (elt -> bool) -> t -> t + val union : t -> t -> t + val map : (elt -> elt) -> t -> t + val iter : (elt -> unit) -> t -> unit + val cardinal : t -> int + val elements : t -> elt list + val schema : + Granular_marshal.iter -> (Granular_marshal.iter -> elt -> unit) -> t -> unit +end + +module Make (Ord : Set.OrderedType) : S with type elt = Ord.t diff --git a/src/index-format/index_format.ml b/src/index-format/index_format.ml index bc2906fbc..e5a3a72eb 100644 --- a/src/index-format/index_format.ml +++ b/src/index-format/index_format.ml @@ -1,25 +1,28 @@ exception Not_an_index of string -module Lid : Set.OrderedType with type t = Longident.t Location.loc = struct - type t = Longident.t Location.loc - - let compare_pos (p1 : Lexing.position) (p2 : Lexing.position) = - let p1f, p2f = Filename.(basename p1.pos_fname, basename p2.pos_fname) in - match String.compare p1f p2f with - | 0 -> Int.compare p1.pos_cnum p2.pos_cnum - | n -> n - - let compare (t1 : t) (t2 : t) = - match compare_pos t1.loc.loc_start t2.loc.loc_start with - | 0 -> compare_pos t1.loc.loc_end t2.loc.loc_end - | n -> n -end - -module Lid_set = Set.Make (Lid) -module Uid_map = Shape.Uid.Map +module Lid = Lid +module Lid_set = Granular_set.Make (Lid) +module Uid_map = Granular_map.Make (Shape.Uid) module Stats = Map.Make (String) module Uid_set = Shape.Uid.Set +module Union_find = struct + type t = Uid_set.t Union_find.element Granular_marshal.link + + let make v = Granular_marshal.link (Union_find.make v) + + let get t = Union_find.get (Granular_marshal.fetch t) + + let union a b = + Granular_marshal.( + link (Union_find.union ~f:Uid_set.union (fetch a) (fetch b))) + + let type_id : t Type.Id.t = Type.Id.make () + + let schema { Granular_marshal.yield } t = + yield t type_id Granular_marshal.schema_no_sublinks +end + let add map uid locs = Uid_map.update uid (function @@ -35,27 +38,59 @@ type index = cu_shape : (Compilation_unit.t, Shape.t) Hashtbl.t; stats : stat Stats.t; root_directory : string option; - related_uids : Uid_set.t Union_find.element Uid_map.t + related_uids : Union_find.t Uid_map.t } +let lidset_schema iter lidset = Lid_set.schema iter Lid.schema lidset + +let type_setmap : Lid_set.t Uid_map.t Type.Id.t = Type.Id.make () +let type_ufmap : Union_find.t Uid_map.t Type.Id.t = Type.Id.make () + +let index_schema (iter : Granular_marshal.iter) index = + Uid_map.schema type_setmap iter + (fun iter _ v -> lidset_schema iter v) + index.defs; + Uid_map.schema type_setmap iter + (fun iter _ v -> lidset_schema iter v) + index.approximated; + Uid_map.schema type_ufmap iter + (fun iter _ v -> Union_find.schema iter v) + index.related_uids + +let compress index = + let cache = Lid.cache () in + let compress_map_set = + Uid_map.iter (fun _ -> Lid_set.iter (Lid.deduplicate cache)) + in + compress_map_set index.defs; + compress_map_set index.approximated; + let related_uids = + Uid_map.map + (fun set -> + let uid = Uid_set.min_elt (Union_find.get set) in + let reference_set = Uid_map.find uid index.related_uids in + Granular_marshal.reuse reference_set; + reference_set) + index.related_uids + in + { index with related_uids } + +let pp_lidset fmt locs = + Format.pp_print_list + ~pp_sep:(fun fmt () -> Format.fprintf fmt ";@;") + Lid.pp fmt (Lid_set.elements locs) + let pp_partials (fmt : Format.formatter) (partials : Lid_set.t Uid_map.t) = Format.fprintf fmt "{@["; Uid_map.iter (fun uid locs -> Format.fprintf fmt "@[uid: %a; locs:@ @[%a@]@]@;" - Shape.Uid.print uid - (Format.pp_print_list - ~pp_sep:(fun fmt () -> Format.fprintf fmt ";@;") - (fun fmt { Location.txt; loc } -> - Format.fprintf fmt "%S: %a" - (try Longident.flatten txt |> String.concat "." with _ -> "") - Location.print_loc loc)) - (Lid_set.elements locs)) + Shape.Uid.print uid pp_lidset locs) partials; Format.fprintf fmt "@]}" let pp_related_uids (fmt : Format.formatter) - (related_uids : Uid_set.t Union_find.element Uid_map.t) = + (related_uids : Union_find.t Uid_map.t) = let rec gather acc map = match Uid_map.choose_opt map with | Some (_key, union) -> @@ -79,14 +114,7 @@ let pp (fmt : Format.formatter) pl = Uid_map.iter (fun uid locs -> Format.fprintf fmt "@[uid: %a; locs:@ @[%a@]@]@;" - Shape.Uid.print uid - (Format.pp_print_list - ~pp_sep:(fun fmt () -> Format.fprintf fmt ";@;") - (fun fmt { Location.txt; loc } -> - Format.fprintf fmt "%S: %a" - (try Longident.flatten txt |> String.concat "." with _ -> "") - Location.print_loc loc)) - (Lid_set.elements locs)) + Shape.Uid.print uid pp_lidset locs) pl.defs; Format.fprintf fmt "@]},@ "; Format.fprintf fmt "%i approx shapes:@ @[%a@],@ " @@ -104,10 +132,11 @@ let ext = "ocaml-index" let magic_number = Config.index_magic_number let write ~file index = + let index = compress index in Misc.output_to_file_via_temporary ~mode:[ Open_binary ] file (fun _temp_file_name oc -> output_string oc magic_number; - output_value oc (index : index)) + Granular_marshal.write oc index_schema (index : index)) type file_content = | Cmt of Cmt_format.cmt_infos @@ -132,7 +161,7 @@ let read ~file = else if String.equal !file_magic_number cms_magic_number then Cms (input_value ic : Cms_format.cms_infos) else if String.equal !file_magic_number magic_number then - Index (input_value ic : index) + Index (Granular_marshal.read file ic index_schema) else Unknown) let read_exn ~file = diff --git a/src/index-format/index_format.mli b/src/index-format/index_format.mli index a6a30775d..0e3e6f977 100644 --- a/src/index-format/index_format.mli +++ b/src/index-format/index_format.mli @@ -3,11 +3,22 @@ exception Not_an_index of string val ext : string val magic_number : string -module Lid : Set.OrderedType with type t = Longident.t Location.loc -module Lid_set : Set.S with type elt = Lid.t +module Lid : sig + include Set.OrderedType + val of_lid : Longident.t Location.loc -> t + val to_lid : t -> Longident.t Location.loc +end +module Lid_set : Granular_set.S with type elt = Lid.t module Stats : Map.S with type key = String.t -module Uid_map = Shape.Uid.Map module Uid_set = Shape.Uid.Set +module Uid_map : Granular_map.S with type key = Shape.Uid.t +module Union_find : sig + type t + + val make : Uid_set.t -> t + val get : t -> Uid_set.t + val union : t -> t -> t +end type stat = { mtime : float; size : int; source_digest : string option } @@ -17,7 +28,7 @@ type index = cu_shape : (Compilation_unit.t, Shape.t) Hashtbl.t; stats : stat Stats.t; root_directory : string option; - related_uids : Uid_set.t Union_find.element Uid_map.t + related_uids : Union_find.t Uid_map.t } val pp : Format.formatter -> index -> unit @@ -34,4 +45,5 @@ type file_content = val write : file:string -> index -> unit val read : file:string -> file_content + val read_exn : file:string -> index diff --git a/src/index-format/lid.ml b/src/index-format/lid.ml new file mode 100644 index 000000000..a21da307b --- /dev/null +++ b/src/index-format/lid.ml @@ -0,0 +1,71 @@ +module G = Granular_marshal + +type pos = { lnum : int; cnum : int; bol : int } +let pos_of_loc { Lexing.pos_lnum = lnum; pos_cnum = cnum; pos_bol = bol; _ } = + { lnum; cnum; bol } + +let loc_of_pos pos_fname { lnum; cnum; bol } = + { Lexing.pos_lnum = lnum; pos_cnum = cnum; pos_bol = bol; pos_fname } + +type t = + { longident : Longident.t G.link; + filename : string G.link; + start : pos; + stop : pos; + ghost : bool + } + +let of_lid { Location.txt; loc = { loc_start; loc_end; loc_ghost } } = + { filename = G.link loc_start.pos_fname; + longident = G.link txt; + ghost = loc_ghost; + start = pos_of_loc loc_start; + stop = pos_of_loc loc_end + } + +let to_lid { filename; longident; ghost; start; stop } = + let filename = G.fetch filename in + let loc_start = loc_of_pos filename start in + let loc_end = loc_of_pos filename stop in + { Location.txt = G.fetch longident; + loc = { loc_start; loc_end; loc_ghost = ghost } + } + +let pp fmt t = + let { Location.txt; loc } = to_lid t in + Format.fprintf fmt "%S: %a" + (try Longident.flatten txt |> String.concat "." with _ -> "") + Location.print_loc loc + +let compare_pos p1 p2 = Int.compare p1.cnum p2.cnum +let compare_filename t1 t2 = + String.compare + (Filename.basename (G.fetch t1.filename)) + (Filename.basename (G.fetch t2.filename)) + +let compare t1 t2 = + match compare_filename t1 t2 with + | 0 -> ( + match compare_pos t1.start t2.start with + | 0 -> compare_pos t1.stop t2.stop + | c -> c) + | c -> c + +let type_string : string G.link Type.Id.t = Type.Id.make () +let type_longident : Longident.t G.link Type.Id.t = Type.Id.make () + +let schema iter t = + iter.G.yield t.filename type_string G.schema_no_sublinks; + iter.G.yield t.longident type_longident G.schema_no_sublinks + +module Li = struct + include Longident + let equal = ( = ) + let hash = Hashtbl.hash +end + +let cache () = G.(cache (module String), cache (module Li)) + +let deduplicate (cache_filename, cache_lid) t = + cache_filename t.filename; + cache_lid t.longident diff --git a/src/ocaml-index/lib/dune b/src/ocaml-index/lib/dune index f21f905bd..a1930d31e 100644 --- a/src/ocaml-index/lib/dune +++ b/src/ocaml-index/lib/dune @@ -1,17 +1,6 @@ (library (name lib) - (libraries - ocaml_typing - ocaml_parsing - ocaml_utils - merlin_utils - merlin_analysis - merlin_index_format) - (flags - :standard - -open Ocaml_typing - -open Ocaml_parsing - -open Ocaml_utils - -open Merlin_utils - -open Merlin_analysis - -open Merlin_index_format)) + (libraries ocaml_typing ocaml_parsing ocaml_utils merlin_utils + merlin_analysis merlin_index_format unix) + (flags :standard -open Ocaml_typing -open Ocaml_parsing -open Ocaml_utils + -open Merlin_utils -open Merlin_analysis -open Merlin_index_format)) diff --git a/src/ocaml-index/lib/index.ml b/src/ocaml-index/lib/index.ml index d73e3ae0d..b253084f0 100644 --- a/src/ocaml-index/lib/index.ml +++ b/src/ocaml-index/lib/index.ml @@ -20,12 +20,11 @@ let add_root ~root (lid : Longident.t Location.loc) = } let merge m m' = - Shape.Uid.Map.union - (fun _uid locs locs' -> Some (Lid_set.union locs locs')) - m m' + Uid_map.union (fun _uid locs locs' -> Some (Lid_set.union locs locs')) m m' let add_one uid lid map = - Shape.Uid.Map.update uid + let lid = Lid.of_lid lid in + Uid_map.update uid (function | None -> Some (Lid_set.singleton lid) | Some set -> Some (Lid_set.add lid set)) @@ -156,8 +155,7 @@ let index_of_artifact ~into ~root ~rewrite_root ~build_path let map_update uid = Uid_map.update uid (function | None -> Some union - | Some union' -> - Some (Union_find.union ~f:Uid_set.union union' union)) + | Some union' -> Some (Union_find.union union' union)) in acc |> map_update uid1 |> map_update uid2) into.related_uids cmt_declaration_dependencies @@ -173,12 +171,12 @@ let index_of_artifact ~into ~root ~rewrite_root ~build_path let shape_of_artifact ~impl_shape ~modname = let cu_shape = Hashtbl.create 1 in Option.iter (Hashtbl.add cu_shape modname) impl_shape; - { defs = Shape.Uid.Map.empty; - approximated = Shape.Uid.Map.empty; + { defs = Uid_map.empty (); + approximated = Uid_map.empty (); cu_shape; stats = Stats.empty; root_directory = None; - related_uids = Uid_map.empty + related_uids = Uid_map.empty () } let shape_of_cmt { Cmt_format.cmt_impl_shape; cmt_modname; _ } = @@ -243,7 +241,7 @@ let merge_index ~store_shapes ~into index = let stats = Stats.union (fun _ f1 _f2 -> Some f1) into.stats index.stats in let related_uids = Uid_map.union - (fun _ a b -> Some (Union_find.union ~f:Uid_set.union a b)) + (fun _ a b -> Some (Union_find.union a b)) index.related_uids into.related_uids in if store_shapes then @@ -254,12 +252,12 @@ let from_files ~store_shapes ~output_file ~root ~rewrite_root ~build_path ~do_not_use_cmt_loadpath files = Log.debug "Debug log is enabled"; let initial_index = - { defs = Shape.Uid.Map.empty; - approximated = Shape.Uid.Map.empty; + { defs = Uid_map.empty (); + approximated = Uid_map.empty (); cu_shape = Hashtbl.create 64; stats = Stats.empty; root_directory = root; - related_uids = Uid_map.empty + related_uids = Uid_map.empty () } in let final_index = @@ -298,12 +296,12 @@ let from_files ~store_shapes ~output_file ~root ~rewrite_root ~build_path let gather_shapes ~output_file files = let initial_index = - { defs = Shape.Uid.Map.empty; - approximated = Shape.Uid.Map.empty; + { defs = Uid_map.empty (); + approximated = Uid_map.empty (); cu_shape = Hashtbl.create 64; stats = Stats.empty; root_directory = None; - related_uids = Uid_map.empty + related_uids = Uid_map.empty () } in let final_index = diff --git a/src/ocaml/utils/directory_content_cache.ml b/src/ocaml/utils/directory_content_cache.ml index 7b5ee9ebc..637c7cf10 100644 --- a/src/ocaml/utils/directory_content_cache.ml +++ b/src/ocaml/utils/directory_content_cache.ml @@ -11,4 +11,3 @@ include File_cache.Make (struct with Sys_error _ -> [||] end) - diff --git a/src/utils/file_cache.ml b/src/utils/file_cache.ml index ad464082a..d8c095628 100644 --- a/src/utils/file_cache.ml +++ b/src/utils/file_cache.ml @@ -79,12 +79,13 @@ struct let fid = File_id.get filename in match Hashtbl.find cache filename with | exception Not_found -> false - | fid', latest_use, _ -> + | fid', latest_use, _file -> if File_id.check fid fid' then begin latest_use := Unix.time (); true end else begin + Hashtbl.remove cache filename; false end diff --git a/tests/test-dirs/occurrences/project-wide/union.t b/tests/test-dirs/occurrences/project-wide/union.t new file mode 100644 index 000000000..a8bcefe81 --- /dev/null +++ b/tests/test-dirs/occurrences/project-wide/union.t @@ -0,0 +1,54 @@ +Serialization of `related_uids` requires special cares as the union-find +algorithm relies on deserialization preserving physical identity (for mutations +to work). The issue manifested on sufficiently large indexes (if small, then +the marshal wouldn't be granular): + + $ NB=1024 + $ for i in $(seq 1 $NB); do echo "let x$i = 0"; done >test.ml + $ for i in $(seq 1 $NB); do echo "val x$i : int"; done >test.mli + $ $OCAMLC -bin-annot -bin-annot-occurrences -c test.mli test.ml + +A signature containing the same symbols: + + $ echo "module type S = sig\n$(cat test.mli)\nend" >sig.ml + $ echo "module type S = sig\n$(cat test.mli)\nend" >sig.mli + $ $OCAMLC -bin-annot -bin-annot-occurrences -c sig.mli sig.ml + +At this point `ŧest` and `sig` are unrelated. We'll later force their unification with: + + $ cat >both.ml < module M = (Test : Sig.S) + > EOF + $ $OCAMLC -bin-annot -bin-annot-occurrences -c both.ml + + $ cat > .merlin << EOF + > INDEX project.ocaml-index + > SOURCE_ROOT . + > EOF + +First compute the index for `test` and `sig`: + + $ ocaml-index aggregate test.cmti test.cmt sig.cmti sig.cmt --root . --rewrite-root + $ mv project.ocaml-index test_sig.ocaml-index + +Then for `both`: + + $ ocaml-index aggregate both.cmt --root . --rewrite-root + +Merge everything together, which reveals the relation between `test` and `sig` uids: + + $ ocaml-index aggregate test_sig.ocaml-index project.ocaml-index + +All files should be listed on queries: (except `both.ml`) + + $ $MERLIN single occurrences -scope renaming -identifier-at 1:5 -filename test.ml < test.ml | jq '.value[] | .file' + "$TESTCASE_ROOT/test.ml" + "$TESTCASE_ROOT/sig.ml" + "$TESTCASE_ROOT/sig.mli" + "$TESTCASE_ROOT/test.mli" + + $ $MERLIN single occurrences -scope renaming -identifier-at 50:5 -filename test.ml < test.ml | jq '.value[] | .file' + "$TESTCASE_ROOT/test.ml" + "$TESTCASE_ROOT/sig.ml" + "$TESTCASE_ROOT/sig.mli" + "$TESTCASE_ROOT/test.mli" From 08481e40a1cd1c8987497f12e2a62955d5e5607c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulysse=20G=C3=A9rard?= Date: Thu, 10 Apr 2025 09:09:49 -0400 Subject: [PATCH 6/7] Disable new upstream test relying on Dune --- tests/test-dirs/occurrences/project-wide/for-renaming/dune | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 tests/test-dirs/occurrences/project-wide/for-renaming/dune diff --git a/tests/test-dirs/occurrences/project-wide/for-renaming/dune b/tests/test-dirs/occurrences/project-wide/for-renaming/dune new file mode 100644 index 000000000..c463368e8 --- /dev/null +++ b/tests/test-dirs/occurrences/project-wide/for-renaming/dune @@ -0,0 +1,6 @@ +; Jane Street disabled tests -- they use dune. +; Revisit when dune can run the flambda-backend compiler. + +(cram + (enabled_if false) + (applies_to r-with-functors)) From 6e941528589c1032a4b1512bed701efb5374fc1f Mon Sep 17 00:00:00 2001 From: Ulysse <5031221+voodoos@users.noreply.github.com> Date: Wed, 16 Apr 2025 17:17:37 -0400 Subject: [PATCH 7/7] Downstream: Fixes for renaming(ocaml/merlin#1924) --- CHANGES.md | 1 + src/analysis/locate.ml | 11 ++++++--- src/index-format/lid.ml | 4 +--- .../for-renaming/r-modules-and-types.t | 12 ++++++++++ .../for-renaming/r-with-functors.t/run.t | 10 ++++---- .../occurrences/project-wide/mli-vs-ml.t | 24 +++++++++++++++++++ .../occurrences/project-wide/prefix.t/run.t | 24 +++++++++++++++++++ .../occurrences/project-wide/pwo-basic.t | 12 ++++++++++ .../occurrences/project-wide/pwo-ml-gen.t | 2 +- .../occurrences/project-wide/stale-index.t | 24 +++++++++---------- 10 files changed, 100 insertions(+), 24 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index dcb47870e..218381c73 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -9,6 +9,7 @@ unreleased - Perform less merges in the indexer (#1881) - Add initial support for project-wide renaming: occurrences can now return all usages of all related definitions. (#1877) + - Fix issues with ident validation and Lid comparison for occurrences (#1924) + ocaml-index - Bump magic number after index file format change. Index can now be read lazilly (#1886) diff --git a/src/analysis/locate.ml b/src/analysis/locate.ml index 13d688bd9..6e01b258b 100644 --- a/src/analysis/locate.ml +++ b/src/analysis/locate.ml @@ -922,7 +922,7 @@ let rec uid_of_result ~traverse_aliases = function | Approximated _ | Unresolved _ | Internal_error_missing_uid -> (None, true) (** This is the main function here *) -let from_path ~config ~env ~local_defs ~decl path = +let from_path ~config ~env ~local_defs ~decl ?ident:_ path = let title = "from_path" in let unalias (decl : Env_lookup.item) = if not config.traverse_aliases then (path, decl.uid) @@ -969,9 +969,12 @@ let from_path ~config ~env ~local_defs ~decl path = in (* Step 2: Uid => Location *) let loc = + let ident = + (* TODO it might not be useful to check the ident without impl_uid *) + Path.last path + in match impl_uid with | Some impl_uid -> - let ident = Path.last path in find_loc_of_uid ~config ~local_defs ~ident ~fallback:uid impl_uid | None -> find_loc_of_uid ~config ~local_defs uid in @@ -1009,7 +1012,9 @@ let from_longident ~config ~env ~local_defs nss ident = in match Env_lookup.by_longident nss ident env with | None -> `Not_in_env str_ident - | Some (path, decl) -> from_path ~config ~env ~local_defs ~decl path + | Some (path, decl) -> + let ident = Longident.last ident in + from_path ~config ~env ~local_defs ~decl ~ident path let from_path ~config ~env ~local_defs ~namespace path = File_switching.reset (); diff --git a/src/index-format/lid.ml b/src/index-format/lid.ml index a21da307b..91bfafe35 100644 --- a/src/index-format/lid.ml +++ b/src/index-format/lid.ml @@ -39,9 +39,7 @@ let pp fmt t = let compare_pos p1 p2 = Int.compare p1.cnum p2.cnum let compare_filename t1 t2 = - String.compare - (Filename.basename (G.fetch t1.filename)) - (Filename.basename (G.fetch t2.filename)) + String.compare (G.fetch t1.filename) (G.fetch t2.filename) let compare t1 t2 = match compare_filename t1 t2 with diff --git a/tests/test-dirs/occurrences/project-wide/for-renaming/r-modules-and-types.t b/tests/test-dirs/occurrences/project-wide/for-renaming/r-modules-and-types.t index b25fafd30..2c3d524e4 100644 --- a/tests/test-dirs/occurrences/project-wide/for-renaming/r-modules-and-types.t +++ b/tests/test-dirs/occurrences/project-wide/for-renaming/r-modules-and-types.t @@ -52,6 +52,18 @@ "col": 7 } }, + { + "file": "$TESTCASE_ROOT/main.ml", + "start": { + "line": 5, + "col": 25 + }, + "end": { + "line": 5, + "col": 26 + }, + "stale": false + }, { "file": "$TESTCASE_ROOT/lib.ml", "start": { diff --git a/tests/test-dirs/occurrences/project-wide/for-renaming/r-with-functors.t/run.t b/tests/test-dirs/occurrences/project-wide/for-renaming/r-with-functors.t/run.t index d28346bf3..dee8c73d2 100644 --- a/tests/test-dirs/occurrences/project-wide/for-renaming/r-with-functors.t/run.t +++ b/tests/test-dirs/occurrences/project-wide/for-renaming/r-with-functors.t/run.t @@ -12,6 +12,11 @@ We expect 2 occurrences in func.ml, 1 in func.mli and 2 in main.ml "line": 1, "col": 22 } + "$TESTCASE_ROOT/main.ml" + { + "line": 4, + "col": 16 + } "$TESTCASE_ROOT/func.ml" { "line": 1, @@ -27,8 +32,3 @@ We expect 2 occurrences in func.ml, 1 in func.mli and 2 in main.ml "line": 1, "col": 24 } - "$TESTCASE_ROOT/main.ml" - { - "line": 4, - "col": 16 - } diff --git a/tests/test-dirs/occurrences/project-wide/mli-vs-ml.t b/tests/test-dirs/occurrences/project-wide/mli-vs-ml.t index 034e7bfcd..8d041fcf7 100644 --- a/tests/test-dirs/occurrences/project-wide/mli-vs-ml.t +++ b/tests/test-dirs/occurrences/project-wide/mli-vs-ml.t @@ -47,6 +47,18 @@ the interface and the implementation. }, "stale": false }, + { + "file": "$TESTCASE_ROOT/main.mli", + "start": { + "line": 2, + "col": 8 + }, + "end": { + "line": 2, + "col": 9 + }, + "stale": false + }, { "file": "$TESTCASE_ROOT/main.ml", "start": { @@ -104,6 +116,18 @@ Same when the cursor is at the origin: }, "stale": false }, + { + "file": "$TESTCASE_ROOT/main.mli", + "start": { + "line": 2, + "col": 8 + }, + "end": { + "line": 2, + "col": 9 + }, + "stale": false + }, { "file": "$TESTCASE_ROOT/main.ml", "start": { diff --git a/tests/test-dirs/occurrences/project-wide/prefix.t/run.t b/tests/test-dirs/occurrences/project-wide/prefix.t/run.t index cfe3799a4..be353f2bc 100644 --- a/tests/test-dirs/occurrences/project-wide/prefix.t/run.t +++ b/tests/test-dirs/occurrences/project-wide/prefix.t/run.t @@ -98,6 +98,18 @@ Merlin successfully finds occurrences outside file when UNIT_NAME directive is u }, "stale": false }, + { + "file": "$TESTCASE_ROOT/b.ml", + "start": { + "line": 2, + "col": 8 + }, + "end": { + "line": 2, + "col": 9 + }, + "stale": false + }, { "file": "$TESTCASE_ROOT/a.ml", "start": { @@ -161,6 +173,18 @@ Merlin successfully finds occurrences outside file when WRAPPING_PREFIX directiv }, "stale": false }, + { + "file": "$TESTCASE_ROOT/b.ml", + "start": { + "line": 2, + "col": 8 + }, + "end": { + "line": 2, + "col": 9 + }, + "stale": false + }, { "file": "$TESTCASE_ROOT/a.ml", "start": { diff --git a/tests/test-dirs/occurrences/project-wide/pwo-basic.t b/tests/test-dirs/occurrences/project-wide/pwo-basic.t index 46a9515a6..849ef8984 100644 --- a/tests/test-dirs/occurrences/project-wide/pwo-basic.t +++ b/tests/test-dirs/occurrences/project-wide/pwo-basic.t @@ -28,6 +28,18 @@ { "class": "return", "value": [ + { + "file": "$TESTCASE_ROOT/main.ml", + "start": { + "line": 1, + "col": 26 + }, + "end": { + "line": 1, + "col": 29 + }, + "stale": false + }, { "file": "$TESTCASE_ROOT/lib.ml", "start": { diff --git a/tests/test-dirs/occurrences/project-wide/pwo-ml-gen.t b/tests/test-dirs/occurrences/project-wide/pwo-ml-gen.t index b1c7a824e..2b81b7709 100644 --- a/tests/test-dirs/occurrences/project-wide/pwo-ml-gen.t +++ b/tests/test-dirs/occurrences/project-wide/pwo-ml-gen.t @@ -57,6 +57,6 @@ We should not index generated modules (lib.ml-gen) $ $MERLIN single occurrences -scope project -identifier-at 3:23 \ > -filename main.ml -filename main.ml < main.ml | jq .value [ { - "file": "$TESTCASE_ROOT/lib.ml", + "file": "$TESTCASE_ROOT/main.ml", "start": { - "line": 2, - "col": 4 + "line": 1, + "col": 26 }, "end": { - "line": 2, - "col": 7 + "line": 1, + "col": 29 }, - "stale": true + "stale": false }, { - "file": "$TESTCASE_ROOT/main.ml", + "file": "$TESTCASE_ROOT/lib.ml", "start": { - "line": 1, - "col": 22 + "line": 2, + "col": 4 }, "end": { - "line": 1, - "col": 29 + "line": 2, + "col": 7 }, - "stale": false + "stale": true } ]