10
10
"""
11
11
# TODO: More consistent terminology, e.g. path/fnam, module/id, state/file
12
12
13
+ import ast
13
14
import binascii
14
15
import collections
15
16
import contextlib
16
17
from distutils .sysconfig import get_python_lib
18
+ import functools
17
19
import gc
18
20
import hashlib
19
21
import json
20
22
import os .path
21
23
import re
22
24
import site
23
25
import stat
26
+ import subprocess
24
27
import sys
25
28
import time
26
29
from os .path import dirname , basename
33
36
if MYPY :
34
37
from typing import Deque
35
38
39
+ from mypy import sitepkgs
36
40
from mypy .nodes import (MODULE_REF , MypyFile , Node , ImportBase , Import , ImportFrom , ImportAll )
37
41
from mypy .semanal_pass1 import SemanticAnalyzerPass1
38
42
from mypy .semanal import SemanticAnalyzerPass2 , apply_semantic_analyzer_patches
@@ -698,7 +702,8 @@ def correct_rel_imp(imp: Union[ImportFrom, ImportAll]) -> str:
698
702
699
703
def is_module (self , id : str ) -> bool :
700
704
"""Is there a file in the file system corresponding to module id?"""
701
- return self .find_module_cache .find_module (id , self .lib_path ) is not None
705
+ return self .find_module_cache .find_module (id , self .lib_path ,
706
+ self .options .python_executable ) is not None
702
707
703
708
def parse_file (self , id : str , path : str , source : str , ignore_errors : bool ) -> MypyFile :
704
709
"""Parse the source of a file with the given name.
@@ -789,6 +794,24 @@ def remove_cwd_prefix_from_path(fscache: FileSystemCache, p: str) -> str:
789
794
return p
790
795
791
796
797
+ @functools .lru_cache (maxsize = None )
798
+ def _get_site_packages_dirs (python_executable : Optional [str ]) -> List [str ]:
799
+ """Find package directories for given python.
800
+
801
+ This runs a subprocess call, which generates a list of the site package directories.
802
+ To avoid repeatedly calling a subprocess (which can be slow!) we lru_cache the results."""
803
+ if python_executable is None :
804
+ return []
805
+ if python_executable == sys .executable :
806
+ # Use running Python's package dirs
807
+ return sitepkgs .getsitepackages ()
808
+ else :
809
+ # Use subprocess to get the package directory of given Python
810
+ # executable
811
+ return ast .literal_eval (subprocess .check_output ([python_executable , sitepkgs .__file__ ],
812
+ stderr = subprocess .PIPE ).decode ())
813
+
814
+
792
815
class FindModuleCache :
793
816
"""Module finder with integrated cache.
794
817
@@ -802,20 +825,29 @@ class FindModuleCache:
802
825
803
826
def __init__ (self , fscache : Optional [FileSystemMetaCache ] = None ) -> None :
804
827
self .fscache = fscache or FileSystemMetaCache ()
805
- # Cache find_module: (id, lib_path) -> result.
806
- self .results = {} # type: Dict[Tuple[str, Tuple[str, ...]], Optional[str]]
828
+ self .find_lib_path_dirs = functools .lru_cache (maxsize = None )(self ._find_lib_path_dirs )
829
+ self .find_module = functools .lru_cache (maxsize = None )(self ._find_module )
830
+
831
+ def clear (self ) -> None :
832
+ self .find_module .cache_clear ()
833
+ self .find_lib_path_dirs .cache_clear ()
807
834
835
+ def _find_lib_path_dirs (self , dir_chain : str , lib_path : Tuple [str , ...]) -> List [str ]:
808
836
# Cache some repeated work within distinct find_module calls: finding which
809
837
# elements of lib_path have even the subdirectory they'd need for the module
810
838
# to exist. This is shared among different module ids when they differ only
811
839
# in the last component.
812
- self .dirs = {} # type: Dict[Tuple[str, Tuple[str, ...]], List[str]]
813
-
814
- def clear (self ) -> None :
815
- self .results .clear ()
816
- self .dirs .clear ()
817
-
818
- def _find_module (self , id : str , lib_path : Tuple [str , ...]) -> Optional [str ]:
840
+ dirs = []
841
+ for pathitem in lib_path :
842
+ # e.g., '/usr/lib/python3.4/foo/bar'
843
+ dir = os .path .normpath (os .path .join (pathitem , dir_chain ))
844
+ if self .fscache .isdir (dir ):
845
+ dirs .append (dir )
846
+ return dirs
847
+
848
+ def _find_module (self , id : str , lib_path : Tuple [str , ...],
849
+ python_executable : Optional [str ]) -> Optional [str ]:
850
+ """Return the path of the module source file, or None if not found."""
819
851
fscache = self .fscache
820
852
821
853
# If we're looking for a module like 'foo.bar.baz', it's likely that most of the
@@ -824,15 +856,23 @@ def _find_module(self, id: str, lib_path: Tuple[str, ...]) -> Optional[str]:
824
856
# that will require the same subdirectory.
825
857
components = id .split ('.' )
826
858
dir_chain = os .sep .join (components [:- 1 ]) # e.g., 'foo/bar'
827
- if (dir_chain , lib_path ) not in self .dirs :
828
- dirs = []
829
- for pathitem in lib_path :
830
- # e.g., '/usr/lib/python3.4/foo/bar'
831
- dir = os .path .normpath (os .path .join (pathitem , dir_chain ))
832
- if fscache .isdir (dir ):
833
- dirs .append (dir )
834
- self .dirs [dir_chain , lib_path ] = dirs
835
- candidate_base_dirs = self .dirs [dir_chain , lib_path ]
859
+ # TODO (ethanhs): refactor each path search to its own method with lru_cache
860
+
861
+ third_party_dirs = []
862
+ # Third-party stub/typed packages
863
+ for pkg_dir in _get_site_packages_dirs (python_executable ):
864
+ stub_name = components [0 ] + '-stubs'
865
+ typed_file = os .path .join (pkg_dir , components [0 ], 'py.typed' )
866
+ stub_dir = os .path .join (pkg_dir , stub_name )
867
+ if fscache .isdir (stub_dir ):
868
+ stub_components = [stub_name ] + components [1 :]
869
+ path = os .path .join (pkg_dir , * stub_components [:- 1 ])
870
+ if fscache .isdir (path ):
871
+ third_party_dirs .append (path )
872
+ elif fscache .isfile (typed_file ):
873
+ path = os .path .join (pkg_dir , dir_chain )
874
+ third_party_dirs .append (path )
875
+ candidate_base_dirs = self .find_lib_path_dirs (dir_chain , lib_path ) + third_party_dirs
836
876
837
877
# If we're looking for a module like 'foo.bar.baz', then candidate_base_dirs now
838
878
# contains just the subdirectories 'foo/bar' that actually exist under the
@@ -845,26 +885,21 @@ def _find_module(self, id: str, lib_path: Tuple[str, ...]) -> Optional[str]:
845
885
# Prefer package over module, i.e. baz/__init__.py* over baz.py*.
846
886
for extension in PYTHON_EXTENSIONS :
847
887
path = base_path + sepinit + extension
888
+ path_stubs = base_path + '-stubs' + sepinit + extension
848
889
if fscache .isfile_case (path ) and verify_module (fscache , id , path ):
849
890
return path
891
+ elif fscache .isfile_case (path_stubs ) and verify_module (fscache , id , path_stubs ):
892
+ return path_stubs
850
893
# No package, look for module.
851
894
for extension in PYTHON_EXTENSIONS :
852
895
path = base_path + extension
853
896
if fscache .isfile_case (path ) and verify_module (fscache , id , path ):
854
897
return path
855
898
return None
856
899
857
- def find_module (self , id : str , lib_path_arg : Iterable [str ]) -> Optional [str ]:
858
- """Return the path of the module source file, or None if not found."""
859
- lib_path = tuple (lib_path_arg )
860
-
861
- key = (id , lib_path )
862
- if key not in self .results :
863
- self .results [key ] = self ._find_module (id , lib_path )
864
- return self .results [key ]
865
-
866
- def find_modules_recursive (self , module : str , lib_path : List [str ]) -> List [BuildSource ]:
867
- module_path = self .find_module (module , lib_path )
900
+ def find_modules_recursive (self , module : str , lib_path : Tuple [str , ...],
901
+ python_executable : Optional [str ]) -> List [BuildSource ]:
902
+ module_path = self .find_module (module , lib_path , python_executable )
868
903
if not module_path :
869
904
return []
870
905
result = [BuildSource (module_path , module , None )]
@@ -884,13 +919,15 @@ def find_modules_recursive(self, module: str, lib_path: List[str]) -> List[Build
884
919
(os .path .isfile (os .path .join (abs_path , '__init__.py' )) or
885
920
os .path .isfile (os .path .join (abs_path , '__init__.pyi' ))):
886
921
hits .add (item )
887
- result += self .find_modules_recursive (module + '.' + item , lib_path )
922
+ result += self .find_modules_recursive (module + '.' + item , lib_path ,
923
+ python_executable )
888
924
elif item != '__init__.py' and item != '__init__.pyi' and \
889
925
item .endswith (('.py' , '.pyi' )):
890
926
mod = item .split ('.' )[0 ]
891
927
if mod not in hits :
892
928
hits .add (mod )
893
- result += self .find_modules_recursive (module + '.' + mod , lib_path )
929
+ result += self .find_modules_recursive (module + '.' + mod , lib_path ,
930
+ python_executable )
894
931
return result
895
932
896
933
@@ -2001,7 +2038,8 @@ def find_module_and_diagnose(manager: BuildManager,
2001
2038
# difference and just assume 'builtins' everywhere,
2002
2039
# which simplifies code.
2003
2040
file_id = '__builtin__'
2004
- path = manager .find_module_cache .find_module (file_id , manager .lib_path )
2041
+ path = manager .find_module_cache .find_module (file_id , manager .lib_path ,
2042
+ manager .options .python_executable )
2005
2043
if path :
2006
2044
# For non-stubs, look at options.follow_imports:
2007
2045
# - normal (default) -> fully analyze
@@ -2125,12 +2163,14 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph:
2125
2163
graph = load_graph (sources , manager )
2126
2164
2127
2165
t1 = time .time ()
2166
+ fm_cache_size = manager .find_module_cache .find_module .cache_info ().currsize
2167
+ fm_dir_cache_size = manager .find_module_cache .find_lib_path_dirs .cache_info ().currsize
2128
2168
manager .add_stats (graph_size = len (graph ),
2129
2169
stubs_found = sum (g .path is not None and g .path .endswith ('.pyi' )
2130
2170
for g in graph .values ()),
2131
2171
graph_load_time = (t1 - t0 ),
2132
- fm_cache_size = len ( manager . find_module_cache . results ) ,
2133
- fm_dir_cache_size = len ( manager . find_module_cache . dirs ) ,
2172
+ fm_cache_size = fm_cache_size ,
2173
+ fm_dir_cache_size = fm_dir_cache_size ,
2134
2174
)
2135
2175
if not graph :
2136
2176
print ("Nothing to do?!" )
0 commit comments