1010"""
1111# TODO: More consistent terminology, e.g. path/fnam, module/id, state/file
1212
13+ import ast
1314import binascii
1415import collections
1516import contextlib
1617from distutils .sysconfig import get_python_lib
18+ import functools
1719import gc
1820import hashlib
1921import json
2022import os .path
2123import re
2224import site
2325import stat
26+ import subprocess
2427import sys
2528import time
2629from os .path import dirname , basename
3336if MYPY :
3437 from typing import Deque
3538
39+ from mypy import sitepkgs
3640from mypy .nodes import (MODULE_REF , MypyFile , Node , ImportBase , Import , ImportFrom , ImportAll )
3741from mypy .semanal_pass1 import SemanticAnalyzerPass1
3842from mypy .semanal import SemanticAnalyzerPass2 , apply_semantic_analyzer_patches
@@ -698,7 +702,8 @@ def correct_rel_imp(imp: Union[ImportFrom, ImportAll]) -> str:
698702
699703 def is_module (self , id : str ) -> bool :
700704 """Is there a file in the file system corresponding to module id?"""
701- return self .find_module_cache .find_module (id , self .lib_path ) is not None
705+ return self .find_module_cache .find_module (id , self .lib_path ,
706+ self .options .python_executable ) is not None
702707
703708 def parse_file (self , id : str , path : str , source : str , ignore_errors : bool ) -> MypyFile :
704709 """Parse the source of a file with the given name.
@@ -789,6 +794,24 @@ def remove_cwd_prefix_from_path(fscache: FileSystemCache, p: str) -> str:
789794 return p
790795
791796
797+ @functools .lru_cache (maxsize = None )
798+ def _get_site_packages_dirs (python_executable : Optional [str ]) -> List [str ]:
799+ """Find package directories for given python.
800+
801+ This runs a subprocess call, which generates a list of the site package directories.
802+ To avoid repeatedly calling a subprocess (which can be slow!) we lru_cache the results."""
803+ if python_executable is None :
804+ return []
805+ if python_executable == sys .executable :
806+ # Use running Python's package dirs
807+ return sitepkgs .getsitepackages ()
808+ else :
809+ # Use subprocess to get the package directory of given Python
810+ # executable
811+ return ast .literal_eval (subprocess .check_output ([python_executable , sitepkgs .__file__ ],
812+ stderr = subprocess .PIPE ).decode ())
813+
814+
792815class FindModuleCache :
793816 """Module finder with integrated cache.
794817
@@ -802,20 +825,29 @@ class FindModuleCache:
802825
803826 def __init__ (self , fscache : Optional [FileSystemMetaCache ] = None ) -> None :
804827 self .fscache = fscache or FileSystemMetaCache ()
805- # Cache find_module: (id, lib_path) -> result.
806- self .results = {} # type: Dict[Tuple[str, Tuple[str, ...]], Optional[str]]
828+ self .find_lib_path_dirs = functools .lru_cache (maxsize = None )(self ._find_lib_path_dirs )
829+ self .find_module = functools .lru_cache (maxsize = None )(self ._find_module )
830+
831+ def clear (self ) -> None :
832+ self .find_module .cache_clear ()
833+ self .find_lib_path_dirs .cache_clear ()
807834
835+ def _find_lib_path_dirs (self , dir_chain : str , lib_path : Tuple [str , ...]) -> List [str ]:
808836 # Cache some repeated work within distinct find_module calls: finding which
809837 # elements of lib_path have even the subdirectory they'd need for the module
810838 # to exist. This is shared among different module ids when they differ only
811839 # in the last component.
812- self .dirs = {} # type: Dict[Tuple[str, Tuple[str, ...]], List[str]]
813-
814- def clear (self ) -> None :
815- self .results .clear ()
816- self .dirs .clear ()
817-
818- def _find_module (self , id : str , lib_path : Tuple [str , ...]) -> Optional [str ]:
840+ dirs = []
841+ for pathitem in lib_path :
842+ # e.g., '/usr/lib/python3.4/foo/bar'
843+ dir = os .path .normpath (os .path .join (pathitem , dir_chain ))
844+ if self .fscache .isdir (dir ):
845+ dirs .append (dir )
846+ return dirs
847+
848+ def _find_module (self , id : str , lib_path : Tuple [str , ...],
849+ python_executable : Optional [str ]) -> Optional [str ]:
850+ """Return the path of the module source file, or None if not found."""
819851 fscache = self .fscache
820852
821853 # If we're looking for a module like 'foo.bar.baz', it's likely that most of the
@@ -824,15 +856,23 @@ def _find_module(self, id: str, lib_path: Tuple[str, ...]) -> Optional[str]:
824856 # that will require the same subdirectory.
825857 components = id .split ('.' )
826858 dir_chain = os .sep .join (components [:- 1 ]) # e.g., 'foo/bar'
827- if (dir_chain , lib_path ) not in self .dirs :
828- dirs = []
829- for pathitem in lib_path :
830- # e.g., '/usr/lib/python3.4/foo/bar'
831- dir = os .path .normpath (os .path .join (pathitem , dir_chain ))
832- if fscache .isdir (dir ):
833- dirs .append (dir )
834- self .dirs [dir_chain , lib_path ] = dirs
835- candidate_base_dirs = self .dirs [dir_chain , lib_path ]
859+ # TODO (ethanhs): refactor each path search to its own method with lru_cache
860+
861+ third_party_dirs = []
862+ # Third-party stub/typed packages
863+ for pkg_dir in _get_site_packages_dirs (python_executable ):
864+ stub_name = components [0 ] + '-stubs'
865+ typed_file = os .path .join (pkg_dir , components [0 ], 'py.typed' )
866+ stub_dir = os .path .join (pkg_dir , stub_name )
867+ if fscache .isdir (stub_dir ):
868+ stub_components = [stub_name ] + components [1 :]
869+ path = os .path .join (pkg_dir , * stub_components [:- 1 ])
870+ if fscache .isdir (path ):
871+ third_party_dirs .append (path )
872+ elif fscache .isfile (typed_file ):
873+ path = os .path .join (pkg_dir , dir_chain )
874+ third_party_dirs .append (path )
875+ candidate_base_dirs = self .find_lib_path_dirs (dir_chain , lib_path ) + third_party_dirs
836876
837877 # If we're looking for a module like 'foo.bar.baz', then candidate_base_dirs now
838878 # contains just the subdirectories 'foo/bar' that actually exist under the
@@ -845,26 +885,21 @@ def _find_module(self, id: str, lib_path: Tuple[str, ...]) -> Optional[str]:
845885 # Prefer package over module, i.e. baz/__init__.py* over baz.py*.
846886 for extension in PYTHON_EXTENSIONS :
847887 path = base_path + sepinit + extension
888+ path_stubs = base_path + '-stubs' + sepinit + extension
848889 if fscache .isfile_case (path ) and verify_module (fscache , id , path ):
849890 return path
891+ elif fscache .isfile_case (path_stubs ) and verify_module (fscache , id , path_stubs ):
892+ return path_stubs
850893 # No package, look for module.
851894 for extension in PYTHON_EXTENSIONS :
852895 path = base_path + extension
853896 if fscache .isfile_case (path ) and verify_module (fscache , id , path ):
854897 return path
855898 return None
856899
857- def find_module (self , id : str , lib_path_arg : Iterable [str ]) -> Optional [str ]:
858- """Return the path of the module source file, or None if not found."""
859- lib_path = tuple (lib_path_arg )
860-
861- key = (id , lib_path )
862- if key not in self .results :
863- self .results [key ] = self ._find_module (id , lib_path )
864- return self .results [key ]
865-
866- def find_modules_recursive (self , module : str , lib_path : List [str ]) -> List [BuildSource ]:
867- module_path = self .find_module (module , lib_path )
900+ def find_modules_recursive (self , module : str , lib_path : Tuple [str , ...],
901+ python_executable : Optional [str ]) -> List [BuildSource ]:
902+ module_path = self .find_module (module , lib_path , python_executable )
868903 if not module_path :
869904 return []
870905 result = [BuildSource (module_path , module , None )]
@@ -884,13 +919,15 @@ def find_modules_recursive(self, module: str, lib_path: List[str]) -> List[Build
884919 (os .path .isfile (os .path .join (abs_path , '__init__.py' )) or
885920 os .path .isfile (os .path .join (abs_path , '__init__.pyi' ))):
886921 hits .add (item )
887- result += self .find_modules_recursive (module + '.' + item , lib_path )
922+ result += self .find_modules_recursive (module + '.' + item , lib_path ,
923+ python_executable )
888924 elif item != '__init__.py' and item != '__init__.pyi' and \
889925 item .endswith (('.py' , '.pyi' )):
890926 mod = item .split ('.' )[0 ]
891927 if mod not in hits :
892928 hits .add (mod )
893- result += self .find_modules_recursive (module + '.' + mod , lib_path )
929+ result += self .find_modules_recursive (module + '.' + mod , lib_path ,
930+ python_executable )
894931 return result
895932
896933
@@ -2001,7 +2038,8 @@ def find_module_and_diagnose(manager: BuildManager,
20012038 # difference and just assume 'builtins' everywhere,
20022039 # which simplifies code.
20032040 file_id = '__builtin__'
2004- path = manager .find_module_cache .find_module (file_id , manager .lib_path )
2041+ path = manager .find_module_cache .find_module (file_id , manager .lib_path ,
2042+ manager .options .python_executable )
20052043 if path :
20062044 # For non-stubs, look at options.follow_imports:
20072045 # - normal (default) -> fully analyze
@@ -2125,12 +2163,14 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph:
21252163 graph = load_graph (sources , manager )
21262164
21272165 t1 = time .time ()
2166+ fm_cache_size = manager .find_module_cache .find_module .cache_info ().currsize
2167+ fm_dir_cache_size = manager .find_module_cache .find_lib_path_dirs .cache_info ().currsize
21282168 manager .add_stats (graph_size = len (graph ),
21292169 stubs_found = sum (g .path is not None and g .path .endswith ('.pyi' )
21302170 for g in graph .values ()),
21312171 graph_load_time = (t1 - t0 ),
2132- fm_cache_size = len ( manager . find_module_cache . results ) ,
2133- fm_dir_cache_size = len ( manager . find_module_cache . dirs ) ,
2172+ fm_cache_size = fm_cache_size ,
2173+ fm_dir_cache_size = fm_dir_cache_size ,
21342174 )
21352175 if not graph :
21362176 print ("Nothing to do?!" )
0 commit comments