11//! Helpers to gather the VCS information for `cargo package`. 
2+ 
23use  crate :: core:: { Package ,  Workspace } ; 
34use  crate :: ops:: PackageOpts ; 
45use  crate :: sources:: PathEntry ; 
@@ -7,11 +8,11 @@ use anyhow::Context;
78use  cargo_util:: paths; 
89use  gix:: bstr:: ByteSlice ; 
910use  gix:: dir:: walk:: EmissionMode ; 
11+ use  gix:: dirwalk:: Options ; 
1012use  gix:: index:: entry:: Mode ; 
1113use  gix:: status:: tree_index:: TrackRenames ; 
1214use  gix:: worktree:: stack:: state:: ignore:: Source ; 
1315use  serde:: Serialize ; 
14- use  std:: collections:: HashSet ; 
1516use  std:: path:: { Path ,  PathBuf } ; 
1617use  tracing:: debug; 
1718
@@ -47,7 +48,7 @@ pub fn check_repo_state(
4748    opts :  & PackageOpts < ' _ > , 
4849)  -> CargoResult < Option < VcsInfo > >  { 
4950    let  gctx = ws. gctx ( ) ; 
50-     let  Ok ( repo)  = gix:: discover ( p. root ( ) )  else  { 
51+     let  Ok ( mut   repo)  = gix:: discover ( p. root ( ) )  else  { 
5152        gctx. shell ( ) . verbose ( |shell| { 
5253            shell. warn ( format_args ! ( 
5354                "no (git) VCS found for `{}`" , 
@@ -115,7 +116,7 @@ pub fn check_repo_state(
115116        path. display( ) , 
116117        workdir. display( ) , 
117118    ) ; 
118-     let  Some ( git)  = git ( ws,  p,  src_files,  & repo,  & opts) ? else  { 
119+     let  Some ( git)  = git ( ws,  p,  src_files,  & mut   repo,  & opts) ? else  { 
119120        // If the git repo lacks essential field like `sha1`, and since this field exists from the beginning, 
120121        // then don't generate the corresponding file in order to maintain consistency with past behavior. 
121122        return  Ok ( None ) ; 
@@ -181,31 +182,32 @@ fn git(
181182    ws :  & Workspace < ' _ > , 
182183    pkg :  & Package , 
183184    src_files :  & [ PathEntry ] , 
184-     repo :  & gix:: Repository , 
185+     repo :  & mut   gix:: Repository , 
185186    opts :  & PackageOpts < ' _ > , 
186187)  -> CargoResult < Option < GitVcsInfo > >  { 
188+     { 
189+         let  mut  config = repo. config_snapshot_mut ( ) ; 
190+         // This currently is only a very minor speedup for the biggest repositories, 
191+         // but might trigger creating many threads. 
192+         config. set_value ( & gix:: config:: tree:: Index :: THREADS ,  "false" ) ?; 
193+     } 
187194    // This is a collection of any dirty or untracked files. This covers: 
188195    // - new/modified/deleted/renamed/type change (index or worktree) 
189196    // - untracked files (which are "new" worktree files) 
190197    // - ignored (in case the user has an `include` directive that 
191198    //   conflicts with .gitignore). 
192-     let  ( mut  dirty_files,   mut  dirty_files_outside_package_root )  =  ( Vec :: new ( ) ,   Vec :: new ( ) ) ; 
199+     let  mut  dirty_files =  Vec :: new ( ) ; 
193200    let  workdir = repo. workdir ( ) . unwrap ( ) ; 
194201    collect_statuses ( 
195202        repo, 
196203        workdir, 
197204        relative_package_root ( repo,  pkg. root ( ) ) . as_deref ( ) , 
198205        & mut  dirty_files, 
199-         & mut  dirty_files_outside_package_root, 
200206    ) ?; 
201207
202208    // Include each submodule so that the error message can provide 
203209    // specifically *which* files in a submodule are modified. 
204-     status_submodules ( 
205-         repo, 
206-         & mut  dirty_files, 
207-         & mut  dirty_files_outside_package_root, 
208-     ) ?; 
210+     status_submodules ( repo,  & mut  dirty_files) ?; 
209211
210212    // Find the intersection of dirty in git, and the src_files that would 
211213    // be packaged. This is a lazy n^2 check, but seems fine with 
@@ -230,10 +232,7 @@ fn git(
230232            } 
231233        } ) 
232234        . map ( |p| p. as_ref ( ) ) 
233-         . chain ( 
234-             dirty_files_outside_pkg_root ( ws,  pkg,  & dirty_files_outside_package_root,  src_files) ?
235-                 . iter ( ) , 
236-         ) 
235+         . chain ( dirty_files_outside_pkg_root_orig ( ws,  pkg,  repo,  src_files) ?. iter ( ) ) 
237236        . map ( |path| { 
238237            pathdiff:: diff_paths ( path,  cwd) 
239238                . as_ref ( ) 
@@ -271,25 +270,17 @@ fn collect_statuses(
271270    workdir :  & Path , 
272271    relative_package_root :  Option < & Path > , 
273272    dirty_files :  & mut  Vec < PathBuf > , 
274-     dirty_files_outside_package_root :  & mut  Vec < PathBuf > , 
275273)  -> CargoResult < ( ) >  { 
276274    let  statuses = repo
277275        . status ( gix:: progress:: Discard ) ?
278-         . dirwalk_options ( |opts| { 
279-             opts. emit_untracked ( gix:: dir:: walk:: EmissionMode :: Matching ) 
280-                 // Also pick up ignored files or whole directories 
281-                 // to specifically catch overzealously ignored source files. 
282-                 // Later we will match these dirs by prefix, which is why collapsing 
283-                 // them is desirable here. 
284-                 . emit_ignored ( Some ( EmissionMode :: CollapseDirectory ) ) 
285-                 . emit_tracked ( false ) 
286-                 . recurse_repositories ( false ) 
287-                 . symlinks_to_directories_are_ignored_like_directories ( true ) 
288-                 . emit_empty_directories ( false ) 
289-         } ) 
276+         . dirwalk_options ( configure_dirwalk) 
290277        . tree_index_track_renames ( TrackRenames :: Disabled ) 
291278        . index_worktree_submodules ( None ) 
292-         . into_iter ( None  /* pathspec patterns */ ) 
279+         . into_iter ( 
280+             relative_package_root. map ( |rela_pkg_root| { 
281+                 gix:: path:: into_bstr ( rela_pkg_root) . into_owned ( ) 
282+             } ) ,  /* pathspec patterns */ 
283+         ) 
293284        . with_context ( || { 
294285            format ! ( 
295286                "failed to begin git status for repo {}" , 
@@ -307,11 +298,6 @@ fn collect_statuses(
307298
308299        let  rel_path = gix:: path:: from_bstr ( status. location ( ) ) ; 
309300        let  path = workdir. join ( & rel_path) ; 
310-         if  relative_package_root. is_some_and ( |pkg_root| !rel_path. starts_with ( pkg_root) )  { 
311-             dirty_files_outside_package_root. push ( path) ; 
312-             continue ; 
313-         } 
314- 
315301        // It is OK to include Cargo.lock even if it is ignored. 
316302        if  path. ends_with ( "Cargo.lock" ) 
317303            && matches ! ( 
@@ -330,11 +316,7 @@ fn collect_statuses(
330316} 
331317
332318/// Helper to collect dirty statuses while recursing into submodules. 
333- fn  status_submodules ( 
334-     repo :  & gix:: Repository , 
335-     dirty_files :  & mut  Vec < PathBuf > , 
336-     dirty_files_outside_package_root :  & mut  Vec < PathBuf > , 
337- )  -> CargoResult < ( ) >  { 
319+ fn  status_submodules ( repo :  & gix:: Repository ,  dirty_files :  & mut  Vec < PathBuf > )  -> CargoResult < ( ) >  { 
338320    let  Some ( submodules)  = repo. submodules ( ) ? else  { 
339321        return  Ok ( ( ) ) ; 
340322    } ; 
@@ -345,14 +327,8 @@ fn status_submodules(
345327            let  Some ( workdir)  = sub_repo. workdir ( )  else  { 
346328                continue ; 
347329            } ; 
348-             status_submodules ( & sub_repo,  dirty_files,  dirty_files_outside_package_root) ?; 
349-             collect_statuses ( 
350-                 & sub_repo, 
351-                 workdir, 
352-                 None , 
353-                 dirty_files, 
354-                 dirty_files_outside_package_root, 
355-             ) ?; 
330+             status_submodules ( & sub_repo,  dirty_files) ?; 
331+             collect_statuses ( & sub_repo,  workdir,  None ,  dirty_files) ?; 
356332        } 
357333    } 
358334    Ok ( ( ) ) 
@@ -374,27 +350,29 @@ fn relative_package_root(repo: &gix::Repository, pkg_root: &Path) -> Option<Path
374350/// This currently looks at 
375351/// 
376352/// * `package.readme` and `package.license-file` pointing to paths outside package root 
377- /// * symlinks targets reside  outside package root 
353+ /// * symlinks targets residing  outside package root 
378354/// * Any change in the root workspace manifest, regardless of what has changed. 
379355/// 
380356/// This is required because those paths may link to a file outside the 
381357/// current package root, but still under the git workdir, affecting the 
382358/// final packaged `.crate` file. 
383- fn  dirty_files_outside_pkg_root ( 
359+ fn  dirty_files_outside_pkg_root_orig ( 
384360    ws :  & Workspace < ' _ > , 
385361    pkg :  & Package , 
386-     dirty_files_outside_of_package_root :  & [ PathBuf ] , 
362+     repo :  & gix :: Repository , 
387363    src_files :  & [ PathEntry ] , 
388- )  -> CargoResult < HashSet < PathBuf > >  { 
364+ )  -> CargoResult < Vec < PathBuf > >  { 
389365    let  pkg_root = pkg. root ( ) ; 
366+     let  workdir = repo. workdir ( ) . unwrap ( ) ; 
367+ 
390368    let  meta = pkg. manifest ( ) . metadata ( ) ; 
391369    let  metadata_paths:  Vec < _ >  = [ & meta. license_file ,  & meta. readme ] 
392370        . into_iter ( ) 
393371        . filter_map ( |p| p. as_deref ( ) ) 
394372        . map ( |path| paths:: normalize_path ( & pkg_root. join ( path) ) ) 
395373        . collect ( ) ; 
396374
397-     let  dirty_files  = src_files
375+     let  linked_files_outside_package_root :   Vec < _ >  = src_files
398376        . iter ( ) 
399377        . filter ( |p| p. is_symlink_or_under_symlink ( ) ) 
400378        . map ( |p| p. as_ref ( ) . as_path ( ) ) 
@@ -403,19 +381,58 @@ fn dirty_files_outside_pkg_root(
403381        // If inside package root. Don't bother checking git status. 
404382        . filter ( |p| paths:: strip_prefix_canonical ( p,  pkg_root) . is_err ( ) ) 
405383        // Handle files outside package root but under git workdir, 
406-         . filter_map ( |src_file| { 
407-             let  canon_src_path = gix:: path:: realpath_opts ( 
408-                 src_file, 
409-                 ws. gctx ( ) . cwd ( ) , 
410-                 gix:: path:: realpath:: MAX_SYMLINKS , 
384+         . filter_map ( |p| paths:: strip_prefix_canonical ( p,  workdir) . ok ( ) ) 
385+         . collect ( ) ; 
386+ 
387+     if  linked_files_outside_package_root. is_empty ( )  { 
388+         return  Ok ( Vec :: new ( ) ) ; 
389+     } 
390+ 
391+     let  statuses = repo
392+         . status ( gix:: progress:: Discard ) ?
393+         . dirwalk_options ( configure_dirwalk) 
394+         // Limit the amount of threads for used for the worktree status, as the pathspec will 
395+         // prevent most paths from being visited anyway there is not much work. 
396+         . index_worktree_options_mut ( |opts| opts. thread_limit  = Some ( 1 ) ) 
397+         . tree_index_track_renames ( TrackRenames :: Disabled ) 
398+         . index_worktree_submodules ( None ) 
399+         . into_iter ( 
400+             linked_files_outside_package_root
401+                 . into_iter ( ) 
402+                 . map ( |p| gix:: path:: into_bstr ( p) . into_owned ( ) ) , 
403+         ) 
404+         . with_context ( || { 
405+             format ! ( 
406+                 "failed to begin git status for outfor repo {}" , 
407+                 repo. path( ) . display( ) 
411408            ) 
412-              . unwrap_or_else ( |_| src_file . to_owned ( ) ) ; 
409+         } ) ? ; 
413410
414-             dirty_files_outside_of_package_root
415-                 . iter ( ) 
416-                 . any ( |p| canon_src_path. starts_with ( p) ) 
417-                 . then_some ( canon_src_path) 
418-         } ) 
419-         . collect ( ) ; 
411+     let  mut  dirty_files = Vec :: new ( ) ; 
412+     for  status in  statuses { 
413+         let  status = status. with_context ( || { 
414+             format ! ( 
415+                 "failed to retrieve git status from repo {}" , 
416+                 repo. path( ) . display( ) 
417+             ) 
418+         } ) ?; 
419+ 
420+         let  rel_path = gix:: path:: from_bstr ( status. location ( ) ) ; 
421+         let  path = workdir. join ( & rel_path) ; 
422+         dirty_files. push ( path) ; 
423+     } 
420424    Ok ( dirty_files) 
421425} 
426+ 
427+ fn  configure_dirwalk ( opts :  Options )  -> Options  { 
428+     opts. emit_untracked ( gix:: dir:: walk:: EmissionMode :: Matching ) 
429+         // Also pick up ignored files or whole directories 
430+         // to specifically catch overzealously ignored source files. 
431+         // Later we will match these dirs by prefix, which is why collapsing 
432+         // them is desirable here. 
433+         . emit_ignored ( Some ( EmissionMode :: CollapseDirectory ) ) 
434+         . emit_tracked ( false ) 
435+         . recurse_repositories ( false ) 
436+         . symlinks_to_directories_are_ignored_like_directories ( true ) 
437+         . emit_empty_directories ( false ) 
438+ } 
0 commit comments