From 3055df272898f95363f2862cf53df5c627c6bf93 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 12 Feb 2016 09:16:09 -0800 Subject: [PATCH 001/117] Dump/load infrastructure set up (data [de]serialization is bogus). --- .gitignore | 1 + mypy/build.py | 202 ++++++++++++++++++++++++++++++++++++++++-- mypy/nodes.py | 13 +++ mypy/serialization.py | 41 +++++++++ 4 files changed, 249 insertions(+), 8 deletions(-) create mode 100644 mypy/serialization.py diff --git a/.gitignore b/.gitignore index 800d9c80a8bb..68eedb55212a 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ docs/build/ *.iml /out/ .venv/ +.mypy_cache/ # Packages *.egg diff --git a/mypy/build.py b/mypy/build.py index 6b2e1932ab3b..76cfa3bababe 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -9,6 +9,8 @@ The function build() is the main interface to this module. """ +import binascii +import json import os import os.path import shlex @@ -17,7 +19,7 @@ import re from os.path import dirname, basename -from typing import Dict, List, Tuple, Iterable, cast, Set, Union, Optional +from typing import Any, Dict, List, Tuple, Iterable, cast, Set, Union, Optional, NamedTuple from mypy.types import Type from mypy.nodes import MypyFile, Node, Import, ImportFrom, ImportAll @@ -30,6 +32,7 @@ from mypy.report import Reports from mypy import defaults from mypy import moduleinfo +from mypy import serialization from mypy import util @@ -73,7 +76,7 @@ PYTHON_EXTENSIONS = ['.pyi', '.py'] -final_state = TYPE_CHECKED_STATE +final_state = TYPE_CHECKED_STATE # XXX Should be FINAL_STATE def earlier_state(s: int, t: int) -> bool: @@ -325,6 +328,16 @@ def read_program(path: str, pyversion: Tuple[int, int]) -> str: return text +CacheMeta = NamedTuple('CacheMeta', + [('id', str), + ('path', str), + ('mtime', float), + ('size', int), + ('dependencies', List[str]), + ('data_mtime', float), + ]) + + class BuildManager: """This is the central class for building a mypy program. @@ -355,6 +368,7 @@ class BuildManager: Item (m, n) indicates whether m depends on n (directly or indirectly). missing_modules: Set of modules that could not be imported encountered so far + loading_cache: Cache for type-checked files """ def __init__(self, data_dir: str, @@ -388,6 +402,7 @@ def __init__(self, data_dir: str, self.module_files = {} # type: Dict[str, str] self.module_deps = {} # type: Dict[Tuple[str, str], bool] self.missing_modules = set() # type: Set[str] + self.loading_cache = {} # type: Dict[str, Optional[CacheMeta]] def process(self, initial_states: List['UnprocessedFile']) -> BuildResult: """Perform a build. @@ -397,6 +412,7 @@ def process(self, initial_states: List['UnprocessedFile']) -> BuildResult: manager object. The return values are identical to the return values of the build function. """ + # TODO: Try import_from_cache() too. self.states += initial_states for initial_state in initial_states: self.module_files[initial_state.id] = initial_state.path @@ -613,6 +629,7 @@ def remove_cwd_prefix_from_path(p: str) -> str: return p +# TODO: Use a NamedTuple? class StateInfo: """Description of a source file that is being built.""" @@ -781,6 +798,10 @@ def process(self) -> None: first = FirstPass(self.semantic_analyzer()) first.analyze(tree, self.path, self.id) + # Initialize module symbol table, which was populated by the semantic + # analyzer. + tree.names = self.semantic_analyzer().globals + # Add all directly imported modules to be processed (however they are # not processed yet, just waiting to be processed). for id, line in self.manager.all_imported_modules_in_file(tree): @@ -802,10 +823,6 @@ def process(self) -> None: self.module_not_found(self.path, line, id) self.manager.missing_modules.add(id) - # Initialize module symbol table, which was populated by the semantic - # analyzer. - tree.names = self.semantic_analyzer().globals - # Replace this state object with a parsed state in BuildManager. self.switch_state(ParsedFile(self.info(), tree)) @@ -819,6 +836,9 @@ def import_module(self, id: str) -> bool: # Do nothing: already being compiled. return True + if import_from_cache(id, self.manager): + return True + if id == 'builtins' and self.manager.pyversion[0] == 2: # The __builtin__ module is called internally by mypy 'builtins' in Python 2 mode # (similar to Python 3), but the stub file is __builtin__.pyi. The reason is that @@ -883,7 +903,7 @@ def __init__(self, info: StateInfo, tree: MypyFile) -> None: # Record the dependencies. Note that the dependencies list also # contains any superpackages and we must preserve them (e.g. os for - # os.path). + # os.path). XXX NOT ACTUALLY TRUE XXX self.dependencies.extend(imp) def process(self) -> None: @@ -935,7 +955,9 @@ def process(self) -> None: # FIX remove from active state list to speed up processing - self.switch_state(TypeCheckedFile(self.info(), self.tree)) + file = TypeCheckedFile(self.info(), self.tree) + dump_to_json(file, self.manager) + self.switch_state(file) def state(self) -> int: return SEMANTICALLY_ANALYSED_STATE @@ -1124,3 +1146,167 @@ def read_with_python_encoding(path: str, pyversion: Tuple[int, int]) -> str: source_bytearray.extend(f.read()) return source_bytearray.decode(encoding) + + +# Experimental incremental loading +# TODO: Flags +# TODO: files on command line (but not __main__) + +MYPY_CACHE = '.mypy_cache' + + +def get_cache_prefix(id: str) -> str: + return os.path.join(MYPY_CACHE, *id.split('.')) + + +def get_cache_names(id: str, path: str) -> Tuple[str, str]: + prefix = get_cache_prefix(id) + is_package = os.path.basename(path).startswith('__init__.py') + if is_package: + prefix = os.path.join(prefix, '__init__') + return (prefix + '.meta.json', prefix + '.data.json') + + +def find_cache_thing(id: str, path: str, + lib_path: Tuple[str, ...], + cache: Dict[str, Optional[CacheMeta]]) -> Optional[CacheMeta]: + if id in cache: + print(' Cached', id, cache[id]) + return cache[id] # Meaning failure if cache[id] is None + meta_json, data_json = get_cache_names(id, path) + print(' Finding', id, data_json) + if not os.path.exists(meta_json): + cache[id] = None + return None + with open(meta_json, 'r') as f: + meta = json.load(f) # TODO: Errors + print(' Meta', id, meta) + if not isinstance(meta, dict): + cache[id] = None + return None + path = os.path.abspath(path) + m = CacheMeta( + meta.get('id'), + meta.get('path'), + meta.get('mtime'), + meta.get('size'), + meta.get('dependencies'), + meta.get('data_mtime'), + ) + if (m.id != id or m.path != path or + m.mtime is None or m.size is None or + m.dependencies is None or m.data_mtime is None): + cache[id] = None + return None + st = os.stat(path) # TODO: Errors + if st.st_mtime != m.mtime or st.st_size != m.size: + cache[id] = None + return None + # It's a match on (id, path, mtime, size). Check the rest. + data_st = os.stat(data_json) # TODO: Combine with exists() above + if data_st.st_mtime != m.data_mtime: + cache[id] = None + return None + # Optimistically put it in the cache to guard against cycles. + # If a dependency is bad we'll change it to None. + cache[id] = m + for d_id in m.dependencies: + if d_id == id: + print(' Cycle', id, m.dependencies) + continue # Depends on itself?! + d_path = find_module(d_id, lib_path) + if d_path is None: + cache[id] = None + return None + thing = find_cache_thing(d_id, d_path, lib_path, cache) + if thing is None: + cache[id] = None + return None + print(' Found', id, meta_json) + return m + + +# TODO: Make the rest BuildManager methods? + +def load_cache_things(id: str, path: str, manager: BuildManager) -> bool: + print(' Looking', id, path) + cache = manager.loading_cache + thing = find_cache_thing(id, path, manager.lib_path, cache) + if thing is None: + return False + for d_id in thing.dependencies: + assert d_id in cache, cache + if not manager.has_module(d_id): + if not load_cache_things(d_id, cache[d_id].path, manager): + return False + _, data_json = get_cache_names(id, path) # TODO: Awkward + print(' Loading', id, data_json) + with open(data_json, 'r') as f: + data = json.load(f) + if os.path.getmtime(data_json) != thing.data_mtime: + return False + tree = MypyFile([], []) + # TODO: Load data into tree + info = StateInfo(path, id, [], manager) + new_file = TypeCheckedFile(info, tree) # TODO: New class to say "from cache" + # TODO: Set new_file.dependencies (avoid computing them) + manager.states.append(new_file) + manager.module_files[id] = path + print(' Loaded', id) + return True + + +def import_from_cache(id: str, manager: BuildManager) -> bool: + print('Import', id) + assert not manager.has_module(id) + path = find_module(id, manager.lib_path) # TODO: Share with rest of import_module() + if path is None: + return False + return load_cache_things(id, path, manager) + + +def rand_suffix(): + return '.' + binascii.hexlify(os.urandom(8)).decode('ascii') + + +def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: + if file.tree.is_stub: + return + id = file.id + if id == '__main__': + return + path = file.path + if path == '': + return + path = os.path.abspath(path) + print('Dumping', id, path) + st = os.stat(path) + mtime = st.st_mtime + size = st.st_size + meta_json, data_json = get_cache_names(id, path) + print(' Writing', id, meta_json, data_json) + data = file.tree.accept(serialization.SerializeVisitor()) + parent = os.path.dirname(data_json) + if not os.path.isdir(parent): + os.makedirs(parent) + assert os.path.dirname(meta_json) == parent + data_json_tmp = data_json + rand_suffix() + meta_json_tmp = meta_json + rand_suffix() + with open(data_json_tmp, 'w') as f: + json.dump(data, f) + f.write('\n') + data_mtime = os.path.getmtime(data_json_tmp) + meta = {'id': id, + 'path': path, + 'mtime': mtime, + 'size': size, + 'data_mtime': data_mtime, + 'dependencies': [d + for d in file.dependencies + if not cast(TypeCheckedFile, manager.lookup_state(d)).tree.is_stub], + } + with open(meta_json_tmp, 'w') as f: + json.dump(meta, f) + f.write('\n') + os.rename(data_json_tmp, data_json) + os.rename(meta_json_tmp, meta_json) diff --git a/mypy/nodes.py b/mypy/nodes.py index aa80666abc0d..4b45a1c9f9c4 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -1715,6 +1715,19 @@ def __str__(self) -> str: s += ' : {}'.format(self.type) return s + def serialize(self, visitor: Any) -> Any: + res = { + '.type': node_kinds[self.kind], + } + if self.mod_id != visitor.mod_id: + res['mod_id'] = self.mod_id + t = self.type + if t is None: + res['type'] = None + else: + res['type'] = str(t) + return res + class SymbolTable(Dict[str, SymbolTableNode]): def __str__(self) -> str: diff --git a/mypy/serialization.py b/mypy/serialization.py new file mode 100644 index 000000000000..cd5804ef38b3 --- /dev/null +++ b/mypy/serialization.py @@ -0,0 +1,41 @@ +from typing import Any + +JsonThing = Any + +from mypy.nodes import (NodeVisitor, SymbolTableNode, MypyFile, Import, ImportFrom) + + +class SerializeVisitor(NodeVisitor[JsonThing]): + + def __init__(self): # TODO + self.mod_id = None + + def visit_mypy_file(self, node: MypyFile) -> JsonThing: + save_mod_id = self.mod_id + try: + self.mod_id = node.fullname() + return { + '.tag': 'MypyFile', + 'fullname': node.fullname(), + 'path': node.path, + ## 'defs': [n.accept(self) for n in node.defs], + 'names': {k: v.serialize(self) for k, v in node.names.items()}, + 'imports': [n.accept(self) for n in node.imports], + 'is_stub': node.is_stub, + } + finally: + self.mod_id = save_mod_id + + def visit_import_from(self, node: ImportFrom) -> JsonThing: + return { + '.tag': 'ImportFrom', + 'id': node.id, + 'names': [[t[0], t[1]] for t in node.names], + 'relative': node.relative, + } + + def visit_import(self, node: Import) -> JsonThing: + return { + '.tag': 'Import', + 'ids': [[t[0], t[1]] for t in node.ids], + } From 466eb7902e98393890161a3e960d0375c1d35c15 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 12 Feb 2016 15:49:32 -0800 Subject: [PATCH 002/117] Major redo; dependency processing now uses build manager states. --- mypy/build.py | 279 ++++++++++++++++++++++-------------------- mypy/nodes.py | 20 +-- mypy/serialization.py | 6 +- 3 files changed, 164 insertions(+), 141 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 76cfa3bababe..b3c9d331b791 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -8,6 +8,7 @@ The function build() is the main interface to this module. """ +# TODO: More consistent terminology, e.g. path/fnam, module/id, state/file import binascii import json @@ -63,20 +64,24 @@ # We aren't processing this source file yet (no associated state object). UNSEEN_STATE = 0 +# We're hopeful that we can load this from the cache. +PROBABLY_CACHED_STATE = 1 +# We've loaded the module from cache. +CACHE_LOADED_STATE = 2 # The source file has a state object, but we haven't done anything with it yet. -UNPROCESSED_STATE = 1 +UNPROCESSED_STATE = 11 # We've parsed the source file. -PARSED_STATE = 2 +PARSED_STATE = 12 # We've done the first two passes of semantic analysis. -PARTIAL_SEMANTIC_ANALYSIS_STATE = 3 +PARTIAL_SEMANTIC_ANALYSIS_STATE = 13 # We've semantically analyzed the source file. -SEMANTICALLY_ANALYSED_STATE = 4 +SEMANTICALLY_ANALYSED_STATE = 14 # We've type checked the source file (and all its dependencies). -TYPE_CHECKED_STATE = 5 +TYPE_CHECKED_STATE = 19 PYTHON_EXTENSIONS = ['.pyi', '.py'] -final_state = TYPE_CHECKED_STATE # XXX Should be FINAL_STATE +FINAL_STATE = TYPE_CHECKED_STATE def earlier_state(s: int, t: int) -> bool: @@ -200,12 +205,17 @@ def build(sources: List[BuildSource], # Construct information that describes the initial files. __main__ is the # implicit module id and the import context is empty initially ([]). - initial_states = [] # type: List[UnprocessedFile] + initial_states = [] # type: List[UnprocessedBase] for source in sources: - content = source.load(lib_path, pyversion) - info = StateInfo(source.effective_path, source.module, [], manager) - initial_state = UnprocessedFile(info, content) - initial_states += [initial_state] + initial_state = None # type: Optional[UnprocessedBase] + if source.module != '__main__' and source.path is not None: + initial_state = manager.maybe_make_cached_state(source.module, source.path) + # TODO: else if using '-m x' try the cache too + if initial_state is None: + content = source.load(lib_path, pyversion) + info = StateInfo(source.effective_path, source.module, [], manager) + initial_state = UnprocessedFile(info, content) + initial_states.append(initial_state) # Perform the build by sending the files as new file (UnprocessedFile is the # initial state of all files) to the manager. The manager will process the @@ -334,7 +344,8 @@ def read_program(path: str, pyversion: Tuple[int, int]) -> str: ('mtime', float), ('size', int), ('dependencies', List[str]), - ('data_mtime', float), + ('data_mtime', float), # mtime of data_json + ('data_json', str), # path of .data.json ]) @@ -404,7 +415,7 @@ def __init__(self, data_dir: str, self.missing_modules = set() # type: Set[str] self.loading_cache = {} # type: Dict[str, Optional[CacheMeta]] - def process(self, initial_states: List['UnprocessedFile']) -> BuildResult: + def process(self, initial_states: List['UnprocessedBase']) -> BuildResult: """Perform a build. The argument is a state that represents the main program @@ -412,7 +423,6 @@ def process(self, initial_states: List['UnprocessedFile']) -> BuildResult: manager object. The return values are identical to the return values of the build function. """ - # TODO: Try import_from_cache() too. self.states += initial_states for initial_state in initial_states: self.module_files[initial_state.id] = initial_state.path @@ -448,7 +458,7 @@ def process(self, initial_states: List['UnprocessedFile']) -> BuildResult: # If there were no errors, all files should have been fully processed. for s in self.states: - assert s.state() == final_state, ( + assert s.state() == FINAL_STATE, ( '{} still unprocessed in state {}'.format(s.path, s.state())) if self.errors.is_errors(): @@ -504,7 +514,7 @@ def module_state(self, name: str) -> int: """ if not self.has_module(name): return UNSEEN_STATE - state = final_state + state = FINAL_STATE fs = self.file_state(self.module_files[name]) if earlier_state(fs, state): state = fs @@ -602,6 +612,13 @@ def trace(self, message: str) -> None: if self.flags.count(VERBOSE) >= 2: print('TRACE:', message, file=sys.stderr) + def maybe_make_cached_state(self, id: str, path: str) -> Optional['UnprocessedBase']: + m = find_cache_meta(id, path, self.lib_path) + if m is None: + return None + info = StateInfo(path, id, self.errors.import_context(), self) + return ProbablyCachedFile(info, m) + def remove_cwd_prefix_from_path(p: str) -> str: """Remove current working directory prefix from p, if present. @@ -743,11 +760,64 @@ def module_not_found(self, path: str, line: int, id: str) -> None: only_once=True) -class UnprocessedFile(State): +class UnprocessedBase(State): + def __init__(self, info: StateInfo) -> None: + super().__init__(info) + self.silent = SILENT_IMPORTS in self.manager.flags + + def load_dependencies(self) -> None: + # TODO: @abstractmethod + raise NotImplementedError + + def import_module(self, id: str) -> bool: + """Schedule a module to be processed. + + Add an unprocessed state object corresponding to the module to the + manager, or do nothing if the module already has a state object. + """ + if self.manager.has_module(id): + # Do nothing: already being compiled. + return True + + if id == 'builtins' and self.manager.pyversion[0] == 2: + # The __builtin__ module is called internally by mypy 'builtins' in Python 2 mode + # (similar to Python 3), but the stub file is __builtin__.pyi. The reason is that + # a lot of code hard codes 'builtins.x' and this it's easier to work it around like + # this. It also means that the implementation can mostly ignore the difference and + # just assume 'builtins' everywhere, which simplifies code. + file_id = '__builtin__' + else: + file_id = id + + path = find_module(file_id, self.manager.lib_path) + if path is None: + return False + + new_file = self.manager.maybe_make_cached_state(id, path) + if new_file is not None: + self.manager.states.append(new_file) + self.manager.module_files[id] = path + new_file.load_dependencies() + return True + + path, text = read_module_source_from_file(file_id, self.manager.lib_path, + self.manager.pyversion, self.silent) + if text is not None: + info = StateInfo(path, id, self.errors().import_context(), + self.manager) + new_file = UnprocessedFile(info, text) + self.manager.states.append(new_file) + self.manager.module_files[id] = path + new_file.load_dependencies() + return True + else: + return False + + +class UnprocessedFile(UnprocessedBase): def __init__(self, info: StateInfo, program_text: str) -> None: super().__init__(info) self.program_text = program_text - self.silent = SILENT_IMPORTS in self.manager.flags def load_dependencies(self): # Add surrounding package(s) as dependencies. @@ -826,41 +896,6 @@ def process(self) -> None: # Replace this state object with a parsed state in BuildManager. self.switch_state(ParsedFile(self.info(), tree)) - def import_module(self, id: str) -> bool: - """Schedule a module to be processed. - - Add an unprocessed state object corresponding to the module to the - manager, or do nothing if the module already has a state object. - """ - if self.manager.has_module(id): - # Do nothing: already being compiled. - return True - - if import_from_cache(id, self.manager): - return True - - if id == 'builtins' and self.manager.pyversion[0] == 2: - # The __builtin__ module is called internally by mypy 'builtins' in Python 2 mode - # (similar to Python 3), but the stub file is __builtin__.pyi. The reason is that - # a lot of code hard codes 'builtins.x' and this it's easier to work it around like - # this. It also means that the implementation can mostly ignore the difference and - # just assume 'builtins' everywhere, which simplifies code. - file_id = '__builtin__' - else: - file_id = id - path, text = read_module_source_from_file(file_id, self.manager.lib_path, - self.manager.pyversion, self.silent) - if text is not None: - info = StateInfo(path, id, self.errors().import_context(), - self.manager) - new_file = UnprocessedFile(info, text) - self.manager.states.append(new_file) - self.manager.module_files[id] = path - new_file.load_dependencies() - return True - else: - return False - def parse(self, source_text: Union[str, bytes], fnam: str) -> MypyFile: """Parse the source of a file with the given name. @@ -881,6 +916,55 @@ def state(self) -> int: return UNPROCESSED_STATE +class ProbablyCachedFile(UnprocessedBase): + def __init__(self, info: StateInfo, meta: CacheMeta) -> None: + super().__init__(info) + self.meta = meta + + def load_dependencies(self): + for dep_id in self.meta.dependencies: + if self.import_module(dep_id): + self.dependencies.append(dep_id) + + def process(self) -> None: + # TODO: Errors + with open(self.meta.data_json) as f: + data = json.load(f) + file = None # type: State + if os.path.getmtime(self.meta.data_json) == self.meta.data_mtime: + file = CacheLoadedFile(self.info(), self.meta, data) + else: + # Didn't work -- construct an UnprocessedFile. + path, text = read_module_source_from_file(self.id, + self.manager.lib_path, + self.manager.pyversion, + SILENT_IMPORTS in self.manager.flags) + # TODO: Errors + assert text is not None + assert path == os.path.abspath(self.path), (path, self.path) + file = UnprocessedFile(self.info(), text) + self.switch_state(file) + + def state(self) -> int: + return PROBABLY_CACHED_STATE + + +class CacheLoadedFile(State): + def __init__(self, info: StateInfo, meta: CacheMeta, data: Any) -> None: + super().__init__(info) + self.meta = meta + self.dependencies += meta.dependencies + self.data = data + + def process(self) -> None: + tree = serialization.load_tree(self.data) + file = TypeCheckedFile(self.info(), tree) + self.switch_state(file) + + def state(self) -> int: + return CACHE_LOADED_STATE + + class ParsedFile(State): tree = None # type: MypyFile @@ -1155,34 +1239,23 @@ def read_with_python_encoding(path: str, pyversion: Tuple[int, int]) -> str: MYPY_CACHE = '.mypy_cache' -def get_cache_prefix(id: str) -> str: - return os.path.join(MYPY_CACHE, *id.split('.')) - - def get_cache_names(id: str, path: str) -> Tuple[str, str]: - prefix = get_cache_prefix(id) + prefix = os.path.join(MYPY_CACHE, *id.split('.')) is_package = os.path.basename(path).startswith('__init__.py') if is_package: prefix = os.path.join(prefix, '__init__') return (prefix + '.meta.json', prefix + '.data.json') -def find_cache_thing(id: str, path: str, - lib_path: Tuple[str, ...], - cache: Dict[str, Optional[CacheMeta]]) -> Optional[CacheMeta]: - if id in cache: - print(' Cached', id, cache[id]) - return cache[id] # Meaning failure if cache[id] is None +def find_cache_meta(id: str, path: str, lib_path: Tuple[str, ...]) -> Optional[CacheMeta]: meta_json, data_json = get_cache_names(id, path) print(' Finding', id, data_json) if not os.path.exists(meta_json): - cache[id] = None return None with open(meta_json, 'r') as f: meta = json.load(f) # TODO: Errors print(' Meta', id, meta) if not isinstance(meta, dict): - cache[id] = None return None path = os.path.abspath(path) m = CacheMeta( @@ -1192,79 +1265,25 @@ def find_cache_thing(id: str, path: str, meta.get('size'), meta.get('dependencies'), meta.get('data_mtime'), + data_json, ) if (m.id != id or m.path != path or m.mtime is None or m.size is None or m.dependencies is None or m.data_mtime is None): - cache[id] = None return None + # TODO: Share stat() outcome with find_module() st = os.stat(path) # TODO: Errors if st.st_mtime != m.mtime or st.st_size != m.size: - cache[id] = None return None - # It's a match on (id, path, mtime, size). Check the rest. - data_st = os.stat(data_json) # TODO: Combine with exists() above - if data_st.st_mtime != m.data_mtime: - cache[id] = None + # It's a match on (id, path, mtime, size). + # Check data_json; assume if its mtime matches it's good. + # TODO: stat() errors + if os.path.getmtime(data_json) != m.data_mtime: return None - # Optimistically put it in the cache to guard against cycles. - # If a dependency is bad we'll change it to None. - cache[id] = m - for d_id in m.dependencies: - if d_id == id: - print(' Cycle', id, m.dependencies) - continue # Depends on itself?! - d_path = find_module(d_id, lib_path) - if d_path is None: - cache[id] = None - return None - thing = find_cache_thing(d_id, d_path, lib_path, cache) - if thing is None: - cache[id] = None - return None - print(' Found', id, meta_json) + print(' Found', id, meta_json, m) return m -# TODO: Make the rest BuildManager methods? - -def load_cache_things(id: str, path: str, manager: BuildManager) -> bool: - print(' Looking', id, path) - cache = manager.loading_cache - thing = find_cache_thing(id, path, manager.lib_path, cache) - if thing is None: - return False - for d_id in thing.dependencies: - assert d_id in cache, cache - if not manager.has_module(d_id): - if not load_cache_things(d_id, cache[d_id].path, manager): - return False - _, data_json = get_cache_names(id, path) # TODO: Awkward - print(' Loading', id, data_json) - with open(data_json, 'r') as f: - data = json.load(f) - if os.path.getmtime(data_json) != thing.data_mtime: - return False - tree = MypyFile([], []) - # TODO: Load data into tree - info = StateInfo(path, id, [], manager) - new_file = TypeCheckedFile(info, tree) # TODO: New class to say "from cache" - # TODO: Set new_file.dependencies (avoid computing them) - manager.states.append(new_file) - manager.module_files[id] = path - print(' Loaded', id) - return True - - -def import_from_cache(id: str, manager: BuildManager) -> bool: - print('Import', id) - assert not manager.has_module(id) - path = find_module(id, manager.lib_path) # TODO: Share with rest of import_module() - if path is None: - return False - return load_cache_things(id, path, manager) - - def rand_suffix(): return '.' + binascii.hexlify(os.urandom(8)).decode('ascii') @@ -1280,7 +1299,7 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: return path = os.path.abspath(path) print('Dumping', id, path) - st = os.stat(path) + st = os.stat(path) # TODO: Errors mtime = st.st_mtime size = st.st_size meta_json, data_json = get_cache_names(id, path) @@ -1293,7 +1312,7 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: data_json_tmp = data_json + rand_suffix() meta_json_tmp = meta_json + rand_suffix() with open(data_json_tmp, 'w') as f: - json.dump(data, f) + json.dump(data, f, indent=2, sort_keys=True) f.write('\n') data_mtime = os.path.getmtime(data_json_tmp) meta = {'id': id, @@ -1306,7 +1325,7 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: if not cast(TypeCheckedFile, manager.lookup_state(d)).tree.is_stub], } with open(meta_json_tmp, 'w') as f: - json.dump(meta, f) + json.dump(meta, f, indent=2, sort_keys=True) f.write('\n') os.rename(data_json_tmp, data_json) os.rename(meta_json_tmp, meta_json) diff --git a/mypy/nodes.py b/mypy/nodes.py index 4b45a1c9f9c4..b4321387e39e 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -1717,15 +1717,20 @@ def __str__(self) -> str: def serialize(self, visitor: Any) -> Any: res = { - '.type': node_kinds[self.kind], - } + '.tag': node_kinds[self.kind], + } # type: Dict[str, Any] + if self.kind == MODULE_REF: + assert isinstance(self.node, MypyFile), self.node + res['module'] = self.node.fullname() if self.mod_id != visitor.mod_id: res['mod_id'] = self.mod_id + if self.tvar_id != 0: + res['tvar_id'] = self.tvar_id t = self.type - if t is None: - res['type'] = None - else: + if t is not None: res['type'] = str(t) + if self.node is not None and self.kind != MODULE_REF: + res['node'] = str(self.node) return res @@ -1747,11 +1752,6 @@ def __str__(self) -> str: return '\n'.join(a) -def clean_up(s: str) -> str: - # TODO remove - return re.sub('.*::', '', s) - - def function_type(func: FuncBase, fallback: 'mypy.types.Instance') -> 'mypy.types.FunctionLike': if func.type: assert isinstance(func.type, mypy.types.FunctionLike) diff --git a/mypy/serialization.py b/mypy/serialization.py index cd5804ef38b3..514c9c852dd0 100644 --- a/mypy/serialization.py +++ b/mypy/serialization.py @@ -19,7 +19,7 @@ def visit_mypy_file(self, node: MypyFile) -> JsonThing: 'fullname': node.fullname(), 'path': node.path, ## 'defs': [n.accept(self) for n in node.defs], - 'names': {k: v.serialize(self) for k, v in node.names.items()}, + 'names': {k: v.serialize(self) for k, v in node.names.items() if k != '__builtins__'}, # TODO: Move to SymbolTable. 'imports': [n.accept(self) for n in node.imports], 'is_stub': node.is_stub, } @@ -39,3 +39,7 @@ def visit_import(self, node: Import) -> JsonThing: '.tag': 'Import', 'ids': [[t[0], t[1]] for t in node.ids], } + + +def load_tree(data: Any) -> MypyFile: + return MypyFile([], []) # TODO From 895fe545e228635b5854e84712bc167ca595c83e Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 13 Feb 2016 09:24:13 -0800 Subject: [PATCH 003/117] Start on serialization. --- mypy/build.py | 2 ++ mypy/nodes.py | 14 ++++++++++++-- mypy/serialization.py | 4 +--- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index b3c9d331b791..17a89e6d1221 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -945,6 +945,8 @@ def process(self) -> None: file = UnprocessedFile(self.info(), text) self.switch_state(file) + # TODO: is_ready() that waits for dependencies to be out of limbo + def state(self) -> int: return PROBABLY_CACHED_STATE diff --git a/mypy/nodes.py b/mypy/nodes.py index b4321387e39e..f6e7a7d12039 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -37,7 +37,7 @@ def get_line(self) -> int: pass MDEF = 2 # type: int MODULE_REF = 3 # type: int # Type variable declared using TypeVar(...) has kind UNBOUND_TVAR. It's not -# valid as a type. A type variable is valid as a type (kind TVAR) within +# valid as a type. A type variable is valid as a type (kind BOUND_TVAR) within # (1) a generic class that uses the type variable as a type argument or # (2) a generic function that refers to the type variable in its signature. UNBOUND_TVAR = 4 # type: int @@ -46,6 +46,7 @@ def get_line(self) -> int: pass # Placeholder for a name imported via 'from ... import'. Second phase of # semantic will replace this the actual imported reference. This is # needed so that we can detect whether a name has been imported during +# XXX what? UNBOUND_IMPORTED = 7 # type: int @@ -1715,7 +1716,7 @@ def __str__(self) -> str: s += ' : {}'.format(self.type) return s - def serialize(self, visitor: Any) -> Any: + def serialize(self, visitor: NodeVisitor[Any]) -> Any: res = { '.tag': node_kinds[self.kind], } # type: Dict[str, Any] @@ -1751,6 +1752,15 @@ def __str__(self) -> str: a[-1] += ')' return '\n'.join(a) + def serialize(self, visitor: NodeVisitor[Any]) -> Dict[str, Any]: + res = {} + for name, node in sorted(self.items()): # XXX: Don't sort + if name != '__builtins__': + ser = node.serialize(visitor) + print('%20s : %s -- %s' % (name, ser['.tag'], repr(ser.get('type')))) # XXX + res[name] = ser + return res + def function_type(func: FuncBase, fallback: 'mypy.types.Instance') -> 'mypy.types.FunctionLike': if func.type: diff --git a/mypy/serialization.py b/mypy/serialization.py index 514c9c852dd0..61be1bb7fd18 100644 --- a/mypy/serialization.py +++ b/mypy/serialization.py @@ -18,9 +18,7 @@ def visit_mypy_file(self, node: MypyFile) -> JsonThing: '.tag': 'MypyFile', 'fullname': node.fullname(), 'path': node.path, - ## 'defs': [n.accept(self) for n in node.defs], - 'names': {k: v.serialize(self) for k, v in node.names.items() if k != '__builtins__'}, # TODO: Move to SymbolTable. - 'imports': [n.accept(self) for n in node.imports], + 'names': node.names.serialize(self), 'is_stub': node.is_stub, } finally: From 9ab40203558145d897302164c57c01187b3043b7 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 14 Feb 2016 17:19:46 -0800 Subject: [PATCH 004/117] Totally different approach to serialization. --- mypy/build.py | 5 +- mypy/nodes.py | 162 ++++++++++++++++++++++++++++++++++++------ mypy/serialization.py | 43 ----------- mypy/types.py | 71 +++++++++++++++++- 4 files changed, 212 insertions(+), 69 deletions(-) delete mode 100644 mypy/serialization.py diff --git a/mypy/build.py b/mypy/build.py index 17a89e6d1221..275fe60719ee 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -33,7 +33,6 @@ from mypy.report import Reports from mypy import defaults from mypy import moduleinfo -from mypy import serialization from mypy import util @@ -959,7 +958,7 @@ def __init__(self, info: StateInfo, meta: CacheMeta, data: Any) -> None: self.data = data def process(self) -> None: - tree = serialization.load_tree(self.data) + tree = MypyFile.deserialize(self.data) file = TypeCheckedFile(self.info(), tree) self.switch_state(file) @@ -1306,7 +1305,7 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: size = st.st_size meta_json, data_json = get_cache_names(id, path) print(' Writing', id, meta_json, data_json) - data = file.tree.accept(serialization.SerializeVisitor()) + data = file.tree.serialize() parent = os.path.dirname(data_json) if not os.path.isdir(parent): os.makedirs(parent) diff --git a/mypy/nodes.py b/mypy/nodes.py index f6e7a7d12039..5ee8b4477e67 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -27,6 +27,8 @@ def get_line(self) -> int: pass T = TypeVar('T') +JsonDict = Dict[str, Any] + # Symbol table node kinds # @@ -64,6 +66,7 @@ def get_line(self) -> int: pass TYPE_ALIAS: 'TypeAlias', UNBOUND_IMPORTED: 'UnboundImported', } +inverse_node_kinds = {_kind: _name for _name, _kind in node_kinds.items()} implicit_module_attrs = {'__name__': '__builtins__.str', @@ -122,6 +125,21 @@ def name(self) -> str: pass @abstractmethod def fullname(self) -> str: pass + # @abstractmethod # TODO + def serialize(self) -> JsonDict: + raise NotImplementedError('Cannot serialize {} instance'.format(self.__class__.__name__)) + + @classmethod + def deserialize(cls, data: JsonDict) -> 'SymbolNode': + classname = data['.class'] + if classname == 'Var': + return Var.deserialize(data) + if classname == 'TypeInfo': + return TypeInfo.deserialize(data) + if classname == 'FuncDef': + return FuncDef.deserialize(data) + raise RuntimeError('unexpected .class {}'.format(classname)) + class MypyFile(SymbolNode): """The abstract syntax tree of a single source file.""" @@ -175,6 +193,18 @@ def is_package_init_file(self) -> bool: return not (self.path is None) and len(self.path) != 0 \ and os.path.basename(self.path).startswith('__init__.') + def serialize(self) -> JsonDict: + return {'.class': 'MypyFile', + 'names': self.names.serialize(), + } + + @classmethod + def deserialize(cls, data: JsonDict) -> 'MypyFile': + assert data['.class'] == 'MypyFile', data + tree = MypyFile([], []) + tree.names = SymbolTable.deserialize(data['names']) + return tree + class ImportBase(Node): """Base class for all import statements.""" @@ -312,6 +342,22 @@ def set_line(self, target: Union[Token, Node, int]) -> Node: self.initialization_statement.set_line(self.line) self.initialization_statement.lvalues[0].set_line(self.line) + def serialize(self) -> JsonDict: + res = {'.class': 'Argument'} # type: JsonDict + res['variable'] = self.variable.serialize() + # TODO: type_annotation + # TODO: initializer + res['kind'] = self.kind + return res + + @classmethod + def deserialize(cls, data: JsonDict) -> 'Argument': + assert data['.class'] == 'Argument' + return Argument(Var.deserialize(data['variable']), + None, # TODO: type_annotation + None, # TODO: initializer + kind=data['kind']) + class FuncItem(FuncBase): arguments = [] # type: List[Argument] @@ -385,6 +431,21 @@ def accept(self, visitor: NodeVisitor[T]) -> T: def is_constructor(self) -> bool: return self.info is not None and self._name == '__init__' + def serialize(self) -> JsonDict: + return {'.class': 'FuncDef', + 'name': self._name, + 'arguments': [a.serialize() for a in self.arguments], + # TODO: type + } + + @classmethod + def deserialize(cls, data: JsonDict) -> 'FuncDef': + assert data['.class'] == 'FuncDef' + body = Block([]) + return FuncDef(data['name'], + [Argument.deserialize(a) for a in data['arguments']], + body) + class Decorator(SymbolNode): """A decorated function. @@ -450,6 +511,27 @@ def fullname(self) -> str: def accept(self, visitor: NodeVisitor[T]) -> T: return visitor.visit_var(self) + def serialize(self) -> JsonDict: + res = {'.class': 'Var', + 'name': self._name, + } # type: JsonDict + if self._fullname is not None: + res['fullname'] = self._fullname + if self.type is not None: + res['type'] = self.type.serialize() + return res + + @classmethod + def deserialize(cls, data: JsonDict) -> 'Var': + assert data['.class'] == 'Var' + name = data['name'] + type = None + if 'type' in data: + type = mypy.types.Type.deserialize(data['type']) + v = Var(name, type) + v._fullname = data.get('fullname') + return v + class ClassDef(Node): """Class definition""" @@ -1651,6 +1733,22 @@ def __str__(self) -> str: ('Names', sorted(self.names.keys()))], 'TypeInfo') + def serialize(self) -> JsonDict: + # TODO (esp. names) + res = {'.class': 'TypeInfo', + 'name': self.name(), + 'fullname': self.fullname(), + } + return res + + @classmethod + def deserialize(cls, data: JsonDict) -> 'TypeInfo': + assert data['.class'] == 'TypeInfo' + names = SymbolTable() # TODO + cdef = ClassDef(data['name'], Block([])) + cdef.fullname = data['fullname'] + return TypeInfo(names, cdef) + class SymbolTableNode: # Kind of node. Possible values: @@ -1716,24 +1814,36 @@ def __str__(self) -> str: s += ' : {}'.format(self.type) return s - def serialize(self, visitor: NodeVisitor[Any]) -> Any: - res = { - '.tag': node_kinds[self.kind], - } # type: Dict[str, Any] + def serialize(self) -> JsonDict: + res = {'.class': 'SymbolTableNode', + 'kind': node_kinds[self.kind], + } # type: JsonDict if self.kind == MODULE_REF: - assert isinstance(self.node, MypyFile), self.node - res['module'] = self.node.fullname() - if self.mod_id != visitor.mod_id: - res['mod_id'] = self.mod_id - if self.tvar_id != 0: - res['tvar_id'] = self.tvar_id - t = self.type - if t is not None: - res['type'] = str(t) - if self.node is not None and self.kind != MODULE_REF: - res['node'] = str(self.node) + res['module_ref'] = self.node.fullname() + else: + typ = self.type + if typ is not None: + # TODO: Shorten simple type references (e.g. builtins.str) + res['type'] = typ.serialize() + else: + if self.node is not None: + res['node'] = self.node.serialize() return res + @classmethod + def deserialize(cls, data: JsonDict) -> 'SymbolTableNode': + assert data['.class'] == 'SymbolTableNode' + kind = inverse_node_kinds[data['kind']] + # NOTE: MODULE_REF needs to be fixed up in a later pass. + typ = None + node = None + if 'type' in data: + typ = mypy.types.Type.deserialize(data['type']) + if 'node' in data: + node = SymbolNode.deserialize(data['node']) + # TODO: Rest + return SymbolTableNode(kind, node, typ=typ) + class SymbolTable(Dict[str, SymbolTableNode]): def __str__(self) -> str: @@ -1752,15 +1862,23 @@ def __str__(self) -> str: a[-1] += ')' return '\n'.join(a) - def serialize(self, visitor: NodeVisitor[Any]) -> Dict[str, Any]: - res = {} - for name, node in sorted(self.items()): # XXX: Don't sort - if name != '__builtins__': - ser = node.serialize(visitor) - print('%20s : %s -- %s' % (name, ser['.tag'], repr(ser.get('type')))) # XXX - res[name] = ser + def serialize(self) -> JsonDict: + res = {'.class': 'SymbolTable'} # type: JsonDict + for key, value in self.items(): + if key == '__builtins__' or not value.module_public: + continue + res[key] = value.serialize() return res + @classmethod + def deserialize(cls, data: JsonDict) -> 'SymbolTable': + assert data['.class'] == 'SymbolTable' + st = SymbolTable() + for key, value in data.items(): + if key != '.class': + st[key] = SymbolTableNode.deserialize(value) + return st + def function_type(func: FuncBase, fallback: 'mypy.types.Instance') -> 'mypy.types.FunctionLike': if func.type: diff --git a/mypy/serialization.py b/mypy/serialization.py deleted file mode 100644 index 61be1bb7fd18..000000000000 --- a/mypy/serialization.py +++ /dev/null @@ -1,43 +0,0 @@ -from typing import Any - -JsonThing = Any - -from mypy.nodes import (NodeVisitor, SymbolTableNode, MypyFile, Import, ImportFrom) - - -class SerializeVisitor(NodeVisitor[JsonThing]): - - def __init__(self): # TODO - self.mod_id = None - - def visit_mypy_file(self, node: MypyFile) -> JsonThing: - save_mod_id = self.mod_id - try: - self.mod_id = node.fullname() - return { - '.tag': 'MypyFile', - 'fullname': node.fullname(), - 'path': node.path, - 'names': node.names.serialize(self), - 'is_stub': node.is_stub, - } - finally: - self.mod_id = save_mod_id - - def visit_import_from(self, node: ImportFrom) -> JsonThing: - return { - '.tag': 'ImportFrom', - 'id': node.id, - 'names': [[t[0], t[1]] for t in node.names], - 'relative': node.relative, - } - - def visit_import(self, node: Import) -> JsonThing: - return { - '.tag': 'Import', - 'ids': [[t[0], t[1]] for t in node.ids], - } - - -def load_tree(data: Any) -> MypyFile: - return MypyFile([], []) # TODO diff --git a/mypy/types.py b/mypy/types.py index d220a1252639..7baae9cb2757 100644 --- a/mypy/types.py +++ b/mypy/types.py @@ -1,7 +1,7 @@ """Classes for representing mypy types.""" from abc import abstractmethod -from typing import Any, TypeVar, List, Tuple, cast, Generic, Set, Sequence, Optional +from typing import Any, TypeVar, Dict, List, Tuple, cast, Generic, Set, Sequence, Optional import mypy.nodes from mypy.nodes import INVARIANT, SymbolNode @@ -9,6 +9,8 @@ T = TypeVar('T') +JsonDict = Dict[str, Any] + class Type(mypy.nodes.Context): """Abstract base class for all types.""" @@ -27,6 +29,20 @@ def accept(self, visitor: 'TypeVisitor[T]') -> T: def __repr__(self) -> str: return self.accept(TypeStrVisitor()) + def serialize(self) -> JsonDict: + raise NotImplementedError('Cannot serialize {} instance'.format(self.__class__.__name__)) + + @classmethod + def deserialize(cls, data: JsonDict) -> 'Type': + classname = data['.class'] + if classname == 'AnyType': + return AnyType.deserialize(data) + if classname == 'Instance': + return Instance.deserialize(data) + if classname == 'CallableType': + return CallableType.deserialize(data) + raise RuntimeError('unexpected .class {}'.format(classname)) + class TypeVarDef(mypy.nodes.Context): """Definition of a single type variable.""" @@ -105,6 +121,14 @@ class AnyType(Type): def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_any(self) + def serialize(self) -> JsonDict: + return {'.class': 'AnyType'} + + @classmethod + def deserialize(cls, data: JsonDict) -> 'AnyType': + assert data['.class'] == 'AnyType' + return AnyType() + class Void(Type): """The return type 'None'. @@ -193,6 +217,28 @@ def __init__(self, typ: mypy.nodes.TypeInfo, args: List[Type], def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_instance(self) + def serialize(self) -> JsonDict: + res = {'.class': 'Instance', + } # type: JsonDict + if self.type is not None: + res['type'] = self.type.serialize() + if self.args: + res['args'] = [arg.serialize() for arg in self.args] + return res + + @classmethod + def deserialize(cls, data: JsonDict) -> 'Instance': + assert data['.class'] == 'Instance' + typ = None + if 'type' in data: + typ = mypy.nodes.TypeInfo.deserialize(data['type']) + args = [] # type: List[Type] + if 'args' in data: + args_list = data['args'] + assert isinstance(args_list, list) + args = [Type.deserialize(arg) for arg in args_list] + return Instance(typ, args) + class TypeVarType(Type): """A type variable type. @@ -374,6 +420,29 @@ def type_var_ids(self) -> List[int]: a.append(tv.id) return a + def serialize(self) -> JsonDict: + return {'.class': 'CallableType', + 'arg_types': [t.serialize() for t in self.arg_types], + 'arg_kinds': self.arg_kinds, + 'arg_names': self.arg_names, + 'ret_type': self.ret_type.serialize(), + 'fallback': self.fallback.serialize(), + 'name': self.name, + # TODO: definition, variables, bound_vars, is_ellipsis_args + } + + @classmethod + def deserialize(cls, data: JsonDict) -> 'CallableType': + assert data['.class'] == 'CallableType' + return CallableType([Type.deserialize(t) for t in data['arg_types']], + data['arg_kinds'], + data['arg_names'], + Type.deserialize(data['ret_type']), + Instance.deserialize(data['fallback']), + name=data.get('name'), + # TODO: definition, variables, bound_vars, is_ellipsis_args + ) + class Overloaded(FunctionLike): """Overloaded function type T1, ... Tn, where each Ti is CallableType. From 68b43260bcc4aa5df5f1d1d51ed3b73ae1c66b1b Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 15 Feb 2016 17:25:43 -0800 Subject: [PATCH 005/117] Improved log()/trace() API. Improved handling cached dependencies. --- mypy/build.py | 240 +++++++++++++++++++++++++++++--------------------- 1 file changed, 139 insertions(+), 101 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 275fe60719ee..a3dd0e7553cb 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -441,7 +441,7 @@ def process(self, initial_states: List['UnprocessedBase']) -> BuildResult: break # Potentially output some debug information. - self.trace('next {} ({})'.format(next.path, next.state())) + self.trace('next {}={} ({})'.format(next.id, next.path, next.state())) # Set the import context for reporting error messages correctly. self.errors.set_import_context(next.import_context) @@ -603,6 +603,13 @@ def final_passes(self, files: List[MypyFile], else: raise RuntimeError('Unsupported target %d' % self.target) + def maybe_make_cached_state(self, id: str, path: str) -> Optional['UnprocessedBase']: + m = find_cache_meta(id, path, self) + if m is None: + return None + info = StateInfo(path, id, self.errors.import_context(), self) + return ProbablyCachedFile(info, m) + def log(self, message: str) -> None: if VERBOSE in self.flags: print('LOG:', message, file=sys.stderr) @@ -611,13 +618,6 @@ def trace(self, message: str) -> None: if self.flags.count(VERBOSE) >= 2: print('TRACE:', message, file=sys.stderr) - def maybe_make_cached_state(self, id: str, path: str) -> Optional['UnprocessedBase']: - m = find_cache_meta(id, path, self.lib_path) - if m is None: - return None - info = StateInfo(path, id, self.errors.import_context(), self) - return ProbablyCachedFile(info, m) - def remove_cwd_prefix_from_path(p: str) -> str: """Remove current working directory prefix from p, if present. @@ -676,6 +676,7 @@ class State: """ # The StateInfo attributes are duplicated here for convenience. + # TODO: Why not just inherit from StateInfo? path = '' id = '' import_context = None # type: List[Tuple[str, int]] @@ -708,8 +709,19 @@ def is_ready(self) -> bool: return True def num_incomplete_deps(self) -> int: - """Return the number of dependencies that are ready but incomplete.""" - return 0 # Does not matter in this state + """Return the number of dependencies that are incomplete. + + Here complete means that their state is *later* than this module. + Cyclic dependencies are omitted to break cycles forcibly (and somewhat + arbitrarily). + """ + incomplete = 0 + for module in self.dependencies: + state = self.manager.module_state(module) + if (not earlier_state(self.state(), state) and + not self.manager.is_dep(module, self.id)): + incomplete += 1 + return incomplete def state(self) -> int: raise RuntimeError('Not implemented') @@ -719,9 +731,13 @@ def switch_state(self, state_object: 'State') -> None: Also notify the manager. """ + # TODO: Make this a method on the manager? for i in range(len(self.manager.states)): if self.manager.states[i].path == state_object.path: self.manager.states[i] = state_object + self.manager.trace('switch {}={} ({})'.format(state_object.id, + state_object.path, + state_object.state())) return raise RuntimeError('State for {} not found'.format(state_object.path)) @@ -766,6 +782,15 @@ def __init__(self, info: StateInfo) -> None: def load_dependencies(self) -> None: # TODO: @abstractmethod + """Finish initialization by adding dependencies. + + This should call import_module() for each dependency and if + that succeeds append it to self.dependencies. + + This cannot be done in __init__() because the new state must + first be added to the manager, so that cyclic imports don't + cause an infinite regress. + """ raise NotImplementedError def import_module(self, id: str) -> bool: @@ -794,6 +819,7 @@ def import_module(self, id: str) -> bool: new_file = self.manager.maybe_make_cached_state(id, path) if new_file is not None: + # TODO: Refactor so this manager update dance only occurs once? self.manager.states.append(new_file) self.manager.module_files[id] = path new_file.load_dependencies() @@ -813,13 +839,82 @@ def import_module(self, id: str) -> bool: return False +class ProbablyCachedFile(UnprocessedBase): + def __init__(self, info: StateInfo, meta: CacheMeta) -> None: + super().__init__(info) + self.meta = meta + + def load_dependencies(self): + for dep_id in self.meta.dependencies: + if self.import_module(dep_id): + self.dependencies.append(dep_id) + + def process(self) -> None: + ok = True + for dep_id in self.dependencies: + state_obj = self.manager.lookup_state(dep_id) + if (isinstance(state_obj, CacheLoadedFile) or + isinstance(state_obj, ProbablyCachedFile)): + continue + if isinstance(state_obj, TypeCheckedFile) and state_obj.meta is not None: + continue + self.manager.log('Abandoning cached data for {} ' + 'because {} changed ({})'.format(self.id, state_obj.id, + state_obj.__class__.__name__)) + ok = False + break + if ok: + # TODO: Errors + with open(self.meta.data_json) as f: + data = json.load(f) + if os.path.getmtime(self.meta.data_json) != self.meta.data_mtime: + self.manager.log('Abandoning cached data for {} ' + 'due to race condition'.format(self.id)) + ok = False + file = None # type: State + if ok: + file = CacheLoadedFile(self.info(), self.meta, data) + else: + # Didn't work -- construct an UnprocessedFile. + path, text = read_module_source_from_file(self.id, + self.manager.lib_path, + self.manager.pyversion, + SILENT_IMPORTS in self.manager.flags) + # TODO: Errors + assert text is not None + assert path == os.path.abspath(self.path), (path, self.path) + file = UnprocessedFile(self.info(), text) + self.switch_state(file) + + # TODO: is_ready() that waits for dependencies to be out of limbo + + def state(self) -> int: + return PROBABLY_CACHED_STATE + + +class CacheLoadedFile(State): + def __init__(self, info: StateInfo, meta: CacheMeta, data: Any) -> None: + super().__init__(info) + self.meta = meta + self.dependencies.extend(meta.dependencies) + self.data = data + + def process(self) -> None: + tree = MypyFile.deserialize(self.data) + file = TypeCheckedFile(self.info(), tree, self.meta) + self.switch_state(file) + + def state(self) -> int: + return CACHE_LOADED_STATE + + class UnprocessedFile(UnprocessedBase): def __init__(self, info: StateInfo, program_text: str) -> None: super().__init__(info) self.program_text = program_text def load_dependencies(self): - # Add surrounding package(s) as dependencies. + # Add surrounding (ancestor) package(s) as dependencies. for p in super_packages(self.id): if p in self.manager.missing_modules: continue @@ -915,76 +1010,31 @@ def state(self) -> int: return UNPROCESSED_STATE -class ProbablyCachedFile(UnprocessedBase): - def __init__(self, info: StateInfo, meta: CacheMeta) -> None: - super().__init__(info) - self.meta = meta - - def load_dependencies(self): - for dep_id in self.meta.dependencies: - if self.import_module(dep_id): - self.dependencies.append(dep_id) - - def process(self) -> None: - # TODO: Errors - with open(self.meta.data_json) as f: - data = json.load(f) - file = None # type: State - if os.path.getmtime(self.meta.data_json) == self.meta.data_mtime: - file = CacheLoadedFile(self.info(), self.meta, data) - else: - # Didn't work -- construct an UnprocessedFile. - path, text = read_module_source_from_file(self.id, - self.manager.lib_path, - self.manager.pyversion, - SILENT_IMPORTS in self.manager.flags) - # TODO: Errors - assert text is not None - assert path == os.path.abspath(self.path), (path, self.path) - file = UnprocessedFile(self.info(), text) - self.switch_state(file) - - # TODO: is_ready() that waits for dependencies to be out of limbo - - def state(self) -> int: - return PROBABLY_CACHED_STATE - - -class CacheLoadedFile(State): - def __init__(self, info: StateInfo, meta: CacheMeta, data: Any) -> None: - super().__init__(info) - self.meta = meta - self.dependencies += meta.dependencies - self.data = data - - def process(self) -> None: - tree = MypyFile.deserialize(self.data) - file = TypeCheckedFile(self.info(), tree) - self.switch_state(file) - - def state(self) -> int: - return CACHE_LOADED_STATE - - class ParsedFile(State): tree = None # type: MypyFile + meta = None # type: Optional[CacheMeta] - def __init__(self, info: StateInfo, tree: MypyFile) -> None: + def __init__(self, info: StateInfo, tree: MypyFile, + meta: CacheMeta = None) -> None: super().__init__(info) self.tree = tree + self.meta = meta - # Build a list all directly imported moules (dependencies). - imp = [] # type: List[str] - for id, line in self.manager.all_imported_modules_in_file(tree): - # Omit missing modules, as otherwise we could not type check - # programs with missing modules. - if id not in self.manager.missing_modules and id != self.id: - imp.append(id) - if self.id != 'builtins': - imp.append('builtins') + if meta is not None: + imp = meta.dependencies + else: + # Build a list all directly imported moules (dependencies). + imp = [] + for id, line in self.manager.all_imported_modules_in_file(tree): + # Omit missing modules, as otherwise we could not type check + # programs with missing modules. + if id not in self.manager.missing_modules and id != self.id: + imp.append(id) + if self.id != 'builtins': + imp.append('builtins') if imp != []: - self.manager.trace('{} dependencies: {}'.format(info.path, imp)) + self.manager.trace('{}={} dependencies: {}'.format(info.id, info.path, imp)) # Record the dependencies. Note that the dependencies list also # contains any superpackages and we must preserve them (e.g. os for @@ -997,21 +1047,6 @@ def process(self) -> None: self.switch_state(PartiallySemanticallyAnalyzedFile(self.info(), self.tree)) - def num_incomplete_deps(self) -> int: - """Return the number of dependencies that are incomplete. - - Here complete means that their state is *later* than this module. - Cyclic dependencies are omitted to break cycles forcibly (and somewhat - arbitrarily). - """ - incomplete = 0 - for module in self.dependencies: - state = self.manager.module_state(module) - if (not earlier_state(self.state(), state) and - not self.manager.is_dep(module, self.id)): - incomplete += 1 - return incomplete - def state(self) -> int: return PARSED_STATE @@ -1248,14 +1283,15 @@ def get_cache_names(id: str, path: str) -> Tuple[str, str]: return (prefix + '.meta.json', prefix + '.data.json') -def find_cache_meta(id: str, path: str, lib_path: Tuple[str, ...]) -> Optional[CacheMeta]: +def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[CacheMeta]: meta_json, data_json = get_cache_names(id, path) - print(' Finding', id, data_json) + manager.log('Finding {} {}'.format(id, data_json)) if not os.path.exists(meta_json): return None with open(meta_json, 'r') as f: - meta = json.load(f) # TODO: Errors - print(' Meta', id, meta) + meta_str = f.read() + manager.log('Meta {} {}'.format(id, meta_str.rstrip())) + meta = json.loads(meta_str) # TODO: Errors if not isinstance(meta, dict): return None path = os.path.abspath(path) @@ -1267,7 +1303,7 @@ def find_cache_meta(id: str, path: str, lib_path: Tuple[str, ...]) -> Optional[C meta.get('dependencies'), meta.get('data_mtime'), data_json, - ) + ) if (m.id != id or m.path != path or m.mtime is None or m.size is None or m.dependencies is None or m.data_mtime is None): @@ -1275,18 +1311,19 @@ def find_cache_meta(id: str, path: str, lib_path: Tuple[str, ...]) -> Optional[C # TODO: Share stat() outcome with find_module() st = os.stat(path) # TODO: Errors if st.st_mtime != m.mtime or st.st_size != m.size: + manager.log('Metadata abandoned because of modified file') return None # It's a match on (id, path, mtime, size). # Check data_json; assume if its mtime matches it's good. # TODO: stat() errors if os.path.getmtime(data_json) != m.data_mtime: return None - print(' Found', id, meta_json, m) + manager.log('Found {} {} {}'.format(id, meta_json, m)) return m -def rand_suffix(): - return '.' + binascii.hexlify(os.urandom(8)).decode('ascii') +def random_string(): + return binascii.hexlify(os.urandom(8)).decode('ascii') def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: @@ -1299,19 +1336,20 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: if path == '': return path = os.path.abspath(path) - print('Dumping', id, path) + manager.log('Dumping {} {}'.format(id, path)) st = os.stat(path) # TODO: Errors mtime = st.st_mtime size = st.st_size meta_json, data_json = get_cache_names(id, path) - print(' Writing', id, meta_json, data_json) + manager.log('Writing {} {} {}'.format(id, meta_json, data_json)) data = file.tree.serialize() parent = os.path.dirname(data_json) if not os.path.isdir(parent): os.makedirs(parent) assert os.path.dirname(meta_json) == parent - data_json_tmp = data_json + rand_suffix() - meta_json_tmp = meta_json + rand_suffix() + nonce = '.' + random_string() + data_json_tmp = data_json + nonce + meta_json_tmp = meta_json + nonce with open(data_json_tmp, 'w') as f: json.dump(data, f, indent=2, sort_keys=True) f.write('\n') @@ -1326,7 +1364,7 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: if not cast(TypeCheckedFile, manager.lookup_state(d)).tree.is_stub], } with open(meta_json_tmp, 'w') as f: - json.dump(meta, f, indent=2, sort_keys=True) + json.dump(meta, f, sort_keys=True) f.write('\n') os.rename(data_json_tmp, data_json) os.rename(meta_json_tmp, meta_json) From 0f8dcdc33123fa27690d4de733e24b2592c50738 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 15 Feb 2016 20:41:20 -0800 Subject: [PATCH 006/117] Serialize more types. We can now serialize (but not yet deserialize!) every type occurring in mypy. --- mypy/build.py | 4 ++ mypy/nodes.py | 95 +++++++++++++++++++++++++++++++++++------------ mypy/types.py | 100 ++++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 165 insertions(+), 34 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index a3dd0e7553cb..1ccd9035f5d4 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -901,6 +901,10 @@ def __init__(self, info: StateInfo, meta: CacheMeta, data: Any) -> None: def process(self) -> None: tree = MypyFile.deserialize(self.data) + + # Store the parsed module in the shared module symbol table. + self.manager.semantic_analyzer.modules[self.id] = tree + file = TypeCheckedFile(self.info(), tree, self.meta) self.switch_state(file) diff --git a/mypy/nodes.py b/mypy/nodes.py index 5ee8b4477e67..1faff0e30102 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -126,19 +126,18 @@ def name(self) -> str: pass def fullname(self) -> str: pass # @abstractmethod # TODO - def serialize(self) -> JsonDict: + def serialize(self) -> Any: raise NotImplementedError('Cannot serialize {} instance'.format(self.__class__.__name__)) @classmethod def deserialize(cls, data: JsonDict) -> 'SymbolNode': classname = data['.class'] - if classname == 'Var': - return Var.deserialize(data) - if classname == 'TypeInfo': - return TypeInfo.deserialize(data) - if classname == 'FuncDef': - return FuncDef.deserialize(data) - raise RuntimeError('unexpected .class {}'.format(classname)) + glo = globals() + if classname in glo: + cl = glo[classname] + if 'deserialize' in cl.__dict__: + return cl.deserialize(data) + raise NotImplementedError('unexpected .class {}'.format(classname)) class MypyFile(SymbolNode): @@ -979,9 +978,6 @@ def __init__(self, name: str) -> None: self.name = name self.literal_hash = ('Var', name,) - def type_node(self): - return cast(TypeInfo, self.node) - def accept(self, visitor: NodeVisitor[T]) -> T: return visitor.visit_name_expr(self) @@ -1483,6 +1479,22 @@ def fullname(self) -> str: def accept(self, visitor: NodeVisitor[T]) -> T: return visitor.visit_type_var_expr(self) + def serialize(self) -> JsonDict: + return {'.class': 'TypeVarExpr', + 'name': self._name, + 'fullname': self._fullname, + 'values': [t.serialize() for t in self.values], + 'variance': self.variance, + } + + @classmethod + def deserialize(cls, data: JsonDict) -> 'TypeVarExpr': + assert data['.class'] == 'TypeVarExpr' + return TypeVarExpr(data['name'], + data['fullname'], + [mypy.types.Type.deserialize(v) for v in data['values']], + data['variance']) + class TypeAliasExpr(Node): """Type alias expression (rvalue).""" @@ -1733,21 +1745,58 @@ def __str__(self) -> str: ('Names', sorted(self.names.keys()))], 'TypeInfo') - def serialize(self) -> JsonDict: - # TODO (esp. names) + def serialize(self) -> Union[str, JsonDict]: + fn = self.fullname() + # TODO: When to return a name, when an object? + if fn: + return fn res = {'.class': 'TypeInfo', 'name': self.name(), 'fullname': self.fullname(), + 'mro': [t.serialize() for t in self.mro], + 'subtypes': [t.serialize() for t in self.subtypes], + 'names': self.names.serialize(), + 'is_abstract': self.is_abstract, + 'abstract_attributes': self.abstract_attributes, + 'is_enum': self.is_enum, + 'fallback_to_any': self.fallback_to_any, + 'type_vars': self.type_vars, + 'bases': [b.serialize() for b in self.bases], + '_promote': None if self._promote is None else self._promote.serialize(), + 'tuple_type': None if self.tuple_type is None else self.tuple_type.serialize(), + 'is_named_tuple': self.is_named_tuple, } return res @classmethod - def deserialize(cls, data: JsonDict) -> 'TypeInfo': - assert data['.class'] == 'TypeInfo' - names = SymbolTable() # TODO - cdef = ClassDef(data['name'], Block([])) - cdef.fullname = data['fullname'] - return TypeInfo(names, cdef) + def deserialize(cls, data: Union[str, JsonDict]) -> 'TypeInfo': + if isinstance(data, str): + fullname = data + name = fullname.rsplit('.', 1)[-1] + names = SymbolTable() + else: + fullname = data['fullname'] + name = data['name'] + names = SymbolTable.deserialize(data['names']) + cdef = ClassDef(name, Block([])) + cdef.fullname = fullname + ti = TypeInfo(names, cdef) + ti._fullname = fullname + if isinstance(data, str): + ti.mro = [] + else: + ti.mro = [TypeInfo.deserialize(t) for t in data['mro']] + ti.subtypes = {TypeInfo.deserialize(t) for t in data['subtypes']} + ti.is_abstract = data['is_abstract'] + ti.abstract_attributes = data['abstract_attributes'] + ti.is_enum = data['is_enum'] + ti.fallback_to_any = data['fallback_to_any'] + ti.type_vars = data['type_vars'] + ti.bases = [mypy.types.Instance.deserialize(b) for b in data['bases']] + ti._promote = None if data['_promote'] is None else mypy.types.Type.deserialize(data['_promote']) + ti.tuple_type = None if data['tuple_type'] is None else mypy.types.TupleType.deserialize(data['tuple_type']) + ti.is_named_tuple = data['is_named_tuple'] + return ti class SymbolTableNode: @@ -1823,11 +1872,11 @@ def serialize(self) -> JsonDict: else: typ = self.type if typ is not None: - # TODO: Shorten simple type references (e.g. builtins.str) - res['type'] = typ.serialize() - else: if self.node is not None: - res['node'] = self.node.serialize() + res['node'] = Var(self.node.name(), self.type).serialize() + else: + res['type'] = typ.serialize() + # TODO: else??? return res @classmethod diff --git a/mypy/types.py b/mypy/types.py index 7baae9cb2757..63b66e7991a9 100644 --- a/mypy/types.py +++ b/mypy/types.py @@ -35,13 +35,12 @@ def serialize(self) -> JsonDict: @classmethod def deserialize(cls, data: JsonDict) -> 'Type': classname = data['.class'] - if classname == 'AnyType': - return AnyType.deserialize(data) - if classname == 'Instance': - return Instance.deserialize(data) - if classname == 'CallableType': - return CallableType.deserialize(data) - raise RuntimeError('unexpected .class {}'.format(classname)) + glo = globals() + if classname in glo: + cl = glo[classname] + if 'deserialize' in cl.__dict__: + return cl.deserialize(data) + raise NotImplementedError('unexpected .class {}'.format(classname)) class TypeVarDef(mypy.nodes.Context): @@ -149,6 +148,14 @@ def accept(self, visitor: 'TypeVisitor[T]') -> T: def with_source(self, source: str) -> 'Void': return Void(source, self.line) + def serialize(self) -> JsonDict: + return {'.class': 'Void'} + + @classmethod + def deserialize(cls, data: JsonDict) -> 'Void': + assert data['.class'] == 'Void' + return Void() + class NoneTyp(Type): """The type of 'None'. @@ -169,6 +176,14 @@ def __init__(self, line: int = -1) -> None: def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_none_type(self) + def serialize(self) -> JsonDict: + return {'.class': 'NoneTyp'} + + @classmethod + def deserialize(self, data: JsonDict) -> 'NoneTyp': + assert data['.class'] == 'NoneTyp' + return NoneTyp() + class ErasedType(Type): """Placeholder for an erased type. @@ -196,6 +211,14 @@ def __init__(self, source: str = None, line: int = -1) -> None: def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_deleted_type(self) + def serialize(self) -> JsonDict: + return {'.class': 'DeletedType'} + + @classmethod + def deserialize(self, data: JsonDict) -> 'DeletedType': + assert data['.class'] == 'DeletedType' + return DeletedType() + class Instance(Type): """An instance type of form C[T1, ..., Tn]. @@ -266,6 +289,24 @@ def __init__(self, name: str, id: int, values: List[Type], upper_bound: Type, def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_type_var(self) + def serialize(self) -> JsonDict: + return {'.class': 'TypeVarType', + 'name': self.name, + 'id': self.id, + 'values': [v.serialize() for v in self.values], + 'upper_bound': self.upper_bound.serialize(), + 'variance': self.variance, + } + + @classmethod + def deserialize(cls, data: JsonDict) -> 'TypeVarType': + assert data['.class'] == 'TypeVarType' + return TypeVarType(data['name'], + data['id'], + [Type.deserialize(v) for v in data['values']], + Type.deserialize(data['upper_bound']), + data['variance']) + class FunctionLike(Type): """Abstract base class for function types.""" @@ -422,25 +463,29 @@ def type_var_ids(self) -> List[int]: def serialize(self) -> JsonDict: return {'.class': 'CallableType', - 'arg_types': [t.serialize() for t in self.arg_types], + 'arg_types': [(None if t is None else t.serialize()) + for t in self.arg_types], 'arg_kinds': self.arg_kinds, 'arg_names': self.arg_names, 'ret_type': self.ret_type.serialize(), 'fallback': self.fallback.serialize(), 'name': self.name, - # TODO: definition, variables, bound_vars, is_ellipsis_args + # TODO: definition, variables, bound_vars + 'is_ellipsis_args': self.is_ellipsis_args, } @classmethod def deserialize(cls, data: JsonDict) -> 'CallableType': assert data['.class'] == 'CallableType' - return CallableType([Type.deserialize(t) for t in data['arg_types']], + return CallableType([(None if t is None else Type.deserialize(t)) + for t in data['arg_types']], data['arg_kinds'], data['arg_names'], Type.deserialize(data['ret_type']), Instance.deserialize(data['fallback']), name=data.get('name'), - # TODO: definition, variables, bound_vars, is_ellipsis_args + # TODO: definition, variables, bound_vars + is_ellipsis_args=data['is_ellipsis_args'], ) @@ -485,6 +530,16 @@ def with_name(self, name: str) -> 'Overloaded': def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_overloaded(self) + def serialize(self) -> JsonDict: + return {'.class': 'Overloaded', + 'items': [t.serialize() for t in self.items()], + } + + @classmethod + def deserialize(self, data: JsonDict) -> 'Overloaded': + assert data['.class'] == 'Overloaded' + return Overloaded([CallableType.deserialize(t) for t in data['items']]) + class TupleType(Type): """The tuple type Tuple[T1, ..., Tn] (at least one type argument). @@ -514,6 +569,19 @@ def length(self) -> int: def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_tuple_type(self) + def serialize(self) -> JsonDict: + return {'.class': 'TupleType', + 'items': [t.serialize() for t in self.items], + 'fallback': self.fallback.serialize(), + # TODO: implicit + } + + @classmethod + def deserialize(cls, data: JsonDict) -> 'TupleType': + assert data['.class'] == 'TupleType' + return TupleType([Type.deserialize(t) for t in data['items']], + Instance.deserialize(data['fallback'])) + class StarType(Type): """The star type *type_parameter. @@ -589,6 +657,16 @@ def has_readable_member(self, name: str) -> bool: (isinstance(x, Instance) and cast(Instance, x).type.has_readable_member(name)) for x in self.items) + def serialize(self) -> JsonDict: + return {'.class': 'UnionType', + 'items': [t.serialize() for t in self.items], + } + + @classmethod + def deserialize(cls, data: JsonDict) -> 'UnionType': + assert data['.class'] == 'UnionType' + return UnionType([Type.deserialize(t) for t in data['items']]) + class PartialType(Type): """Type such as List[?] where type arguments are unknown, or partial None type. From 44b28284bc06e4f5191ace119f7b3c6d2ba575ab Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 17 Feb 2016 17:05:27 -0800 Subject: [PATCH 007/117] Start fixing up nodes after deserialization(). --- mypy/build.py | 44 +++++++++++++++++++++++++++++++++++++++++--- mypy/fixup.py | 40 ++++++++++++++++++++++++++++++++++++++++ mypy/nodes.py | 11 ++++++++--- 3 files changed, 89 insertions(+), 6 deletions(-) create mode 100644 mypy/fixup.py diff --git a/mypy/build.py b/mypy/build.py index 1ccd9035f5d4..0ec58529ec7c 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -28,6 +28,7 @@ from mypy.semanal import SemanticAnalyzer, FirstPass, ThirdPass from mypy.checker import TypeChecker from mypy.errors import Errors, CompileError +from mypy import fixup from mypy import parse from mypy import stats from mypy.report import Reports @@ -845,19 +846,46 @@ def __init__(self, info: StateInfo, meta: CacheMeta) -> None: self.meta = meta def load_dependencies(self): - for dep_id in self.meta.dependencies: + deps = self.meta.dependencies[:] + if self.id != 'builtins': + deps.append('builtins') # Even cached modules need this. + for dep_id in deps: if self.import_module(dep_id): self.dependencies.append(dep_id) + # TODO: else fail(...) + + def is_ready(self): + # Special case for builtins. + if self.id != 'builtins': + state = self.manager.module_state('builtins') + if state not in (UNSEEN_STATE, TYPE_CHECKED_STATE): + return False + return super().is_ready() def process(self) -> None: + """Transition to either UnprocessedFile or CacheLoadedFile. + + We've been waiting for results on the dependencies. If all + dependencies have now transitioned to eith CacheLoadedFile + (meaning their own dependencies were found good, except for + cycles) or from there to TypeCheckedFile (note that we check + that meta is not None) then we can in turn (try to) transition + to CacheLoadedFile. This could still fail due to a race + condition (if the data file's mtime). + + If any dependency was not loaded from cache or loading the + data failed, we fall back to reading the source, by switching + to an UnprocessedFile. + """ ok = True for dep_id in self.dependencies: state_obj = self.manager.lookup_state(dep_id) if (isinstance(state_obj, CacheLoadedFile) or isinstance(state_obj, ProbablyCachedFile)): continue - if isinstance(state_obj, TypeCheckedFile) and state_obj.meta is not None: - continue + if isinstance(state_obj, TypeCheckedFile): + if state_obj.meta is not None or dep_id == 'builtins': + continue self.manager.log('Abandoning cached data for {} ' 'because {} changed ({})'.format(self.id, state_obj.id, state_obj.__class__.__name__)) @@ -897,14 +925,24 @@ def __init__(self, info: StateInfo, meta: CacheMeta, data: Any) -> None: super().__init__(info) self.meta = meta self.dependencies.extend(meta.dependencies) + if self.id != 'builtins': + self.dependencies.append('builtins') # Even cached modules need this. self.data = data def process(self) -> None: + """Transition directly to TypeCheckedFile. + + This deserializes the tree and patches up cross-references. + """ tree = MypyFile.deserialize(self.data) # Store the parsed module in the shared module symbol table. self.manager.semantic_analyzer.modules[self.id] = tree + # Fix up various things in the symbol tables. + print('Fixing up', self.id) + fixup.fixup_symbol_table(tree.names, self.semantic_analyzer().modules) + file = TypeCheckedFile(self.info(), tree, self.meta) self.switch_state(file) diff --git a/mypy/fixup.py b/mypy/fixup.py new file mode 100644 index 000000000000..8512cbc2e3f0 --- /dev/null +++ b/mypy/fixup.py @@ -0,0 +1,40 @@ +"""Fix up various things after deserialization().""" + +from typing import Dict, cast + +from mypy.nodes import MypyFile, SymbolTable, SymbolTableNode, TypeInfo, Var, LDEF, MDEF, GDEF, MODULE_REF +from mypy.types import Instance, CallableType + + +def lookup_qualified(name: str, modules: Dict[str, MypyFile]) -> SymbolTableNode: + parts = name.split('.') + node = modules.get(parts[0]) + if node is None: + return None + for part in parts[1:-1]: + if part not in node.names: + return None + node = cast(MypyFile, node.names[part].node) + assert isinstance(node, MypyFile) + return node.names.get(parts[-1]) + + +def fixup_symbol_table(symtab: SymbolTable, modules: Dict[str, MypyFile]) -> None: + for key, value in symtab.items(): + if value.kind in (LDEF, MDEF, GDEF): + if isinstance(value.node, Var): + fixup_var(value.node, modules) + + +def fixup_var(node: Var, modules: Dict[str, MypyFile]) -> None: + if isinstance(node.type, Instance): + if isinstance(node.type.type, TypeInfo): + if node.type.type.is_dummy: + stnode = lookup_qualified(node.type.type.fullname(), modules) + assert stnode is not None and stnode.kind == GDEF + if isinstance(stnode.node, TypeInfo): + node.type.type = stnode.node + print('Fixed up type for', node, 'from', stnode.node.fullname()) + else: + assert False, stnode.node + return diff --git a/mypy/nodes.py b/mypy/nodes.py index 1faff0e30102..bd637969909e 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -1608,6 +1608,9 @@ class is generic then it will be a type constructor of higher kind. # Is this a named tuple type? is_named_tuple = False + # Is this a dummy from deserialization? + is_dummy = False + def __init__(self, names: 'SymbolTable', defn: ClassDef) -> None: """Initialize a TypeInfo.""" self.names = names @@ -1783,7 +1786,7 @@ def deserialize(cls, data: Union[str, JsonDict]) -> 'TypeInfo': ti = TypeInfo(names, cdef) ti._fullname = fullname if isinstance(data, str): - ti.mro = [] + ti.is_dummy = True else: ti.mro = [TypeInfo.deserialize(t) for t in data['mro']] ti.subtypes = {TypeInfo.deserialize(t) for t in data['subtypes']} @@ -1793,8 +1796,10 @@ def deserialize(cls, data: Union[str, JsonDict]) -> 'TypeInfo': ti.fallback_to_any = data['fallback_to_any'] ti.type_vars = data['type_vars'] ti.bases = [mypy.types.Instance.deserialize(b) for b in data['bases']] - ti._promote = None if data['_promote'] is None else mypy.types.Type.deserialize(data['_promote']) - ti.tuple_type = None if data['tuple_type'] is None else mypy.types.TupleType.deserialize(data['tuple_type']) + ti._promote = (None if data['_promote'] is None + else mypy.types.Type.deserialize(data['_promote'])) + ti.tuple_type = (None if data['tuple_type'] is None + else mypy.types.TupleType.deserialize(data['tuple_type'])) ti.is_named_tuple = data['is_named_tuple'] return ti From f2320562c4c871f7b041421353a4273d9706745a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 18 Feb 2016 11:24:23 -0800 Subject: [PATCH 008/117] Another big step towards correct [de]serialization. Added fairly thorough check on what we wrote. --- mypy/build.py | 49 +++++++++++++++++ mypy/fixup.py | 7 +-- mypy/nodes.py | 142 ++++++++++++++++++++++++++++++++++---------------- mypy/types.py | 55 +++++++++++++++---- 4 files changed, 195 insertions(+), 58 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 0ec58529ec7c..4eff32d7bd43 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1410,3 +1410,52 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: f.write('\n') os.rename(data_json_tmp, data_json) os.rename(meta_json_tmp, meta_json) + + # Now, as a test, read it back. + print() + print('Reading what we wrote for', file.id, 'from', data_json) + with open(data_json, 'r') as f: + new_data = json.load(f) + assert new_data == data + new_tree = MypyFile.deserialize(new_data) + new_names = new_tree.names + new_keys = sorted(new_names) + + print('Fixing up', file.id) + fixup.fixup_symbol_table(new_names, file.semantic_analyzer().modules) + + print('Comparing keys', file.id) + old_tree = file.tree + old_names = old_tree.names + old_keys = sorted(old_names) + if new_keys != old_keys: + for key in new_keys: + if key not in old_keys: + print(' New key', key, 'not found in old tree') + for key in old_keys: + if key not in new_keys: + v = old_names[key] + if key != '__builtins__' and v.module_public: + print(' Old key', key, 'not found in new tree') + + print('Comparing values', file.id) + modules = file.semantic_analyzer().modules + for key in old_keys: + if key not in new_keys: + continue + oldv = old_names[key] + newv = new_names[key] + if newv.mod_id != oldv.mod_id: + newv.mod_id = file.id # XXX Hack + if newv.kind == MODULE_REF and newv.node is None: + fn = oldv.node.fullname() + if fn in modules: + newv.node = modules[fn] + else: + print('*** Cannot fix up reference to module', fn, 'for', key) + if str(oldv) != str(newv): + print(' ', key, 'old', oldv) + print(' ', ' ' * len(key), 'new', newv) + import pdb # type: ignore + pdb.set_trace() + print() diff --git a/mypy/fixup.py b/mypy/fixup.py index 8512cbc2e3f0..a84bf6f95fa4 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -2,7 +2,8 @@ from typing import Dict, cast -from mypy.nodes import MypyFile, SymbolTable, SymbolTableNode, TypeInfo, Var, LDEF, MDEF, GDEF, MODULE_REF +from mypy.nodes import (MypyFile, SymbolTable, SymbolTableNode, TypeInfo, Var, + LDEF, MDEF, GDEF, MODULE_REF) from mypy.types import Instance, CallableType @@ -15,7 +16,7 @@ def lookup_qualified(name: str, modules: Dict[str, MypyFile]) -> SymbolTableNode if part not in node.names: return None node = cast(MypyFile, node.names[part].node) - assert isinstance(node, MypyFile) + assert isinstance(node, MypyFile), node return node.names.get(parts[-1]) @@ -31,7 +32,7 @@ def fixup_var(node: Var, modules: Dict[str, MypyFile]) -> None: if isinstance(node.type.type, TypeInfo): if node.type.type.is_dummy: stnode = lookup_qualified(node.type.type.fullname(), modules) - assert stnode is not None and stnode.kind == GDEF + assert stnode is not None and stnode.kind == GDEF, stnode if isinstance(stnode.node, TypeInfo): node.type.type = stnode.node print('Fixed up type for', node, 'from', stnode.node.fullname()) diff --git a/mypy/nodes.py b/mypy/nodes.py index bd637969909e..77ef11958cc3 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -194,14 +194,20 @@ def is_package_init_file(self) -> bool: def serialize(self) -> JsonDict: return {'.class': 'MypyFile', + '_name': self._name, + '_fullname': self._fullname, 'names': self.names.serialize(), + 'is_stub': self.is_stub, } @classmethod def deserialize(cls, data: JsonDict) -> 'MypyFile': assert data['.class'] == 'MypyFile', data tree = MypyFile([], []) + tree._name = data['_name'] + tree._fullname = data['_fullname'] tree.names = SymbolTable.deserialize(data['names']) + tree.is_stub = data['is_stub'] return tree @@ -300,6 +306,16 @@ def name(self) -> str: def accept(self, visitor: NodeVisitor[T]) -> T: return visitor.visit_overloaded_func_def(self) + def serialize(self) -> JsonDict: + return {'.class': 'OverloadedFuncDef', + 'items': [i.serialize() for i in self.items], + } + + @classmethod + def deserialize(cls, data: JsonDict) -> 'OverloadedFuncDef': + assert data['.class'] == 'OverloadedFuncDef' + return OverloadedFuncDef([Decorator.deserialize(d) for d in data['items']]) + class Argument(Node): """A single argument in a FuncItem.""" @@ -342,12 +358,12 @@ def set_line(self, target: Union[Token, Node, int]) -> Node: self.initialization_statement.lvalues[0].set_line(self.line) def serialize(self) -> JsonDict: - res = {'.class': 'Argument'} # type: JsonDict - res['variable'] = self.variable.serialize() + data = {'.class': 'Argument'} # type: JsonDict + data['variable'] = self.variable.serialize() # TODO: type_annotation # TODO: initializer - res['kind'] = self.kind - return res + data['kind'] = self.kind + return data @classmethod def deserialize(cls, data: JsonDict) -> 'Argument': @@ -434,6 +450,7 @@ def serialize(self) -> JsonDict: return {'.class': 'FuncDef', 'name': self._name, 'arguments': [a.serialize() for a in self.arguments], + 'type': None if self.type is None else self.type.serialize(), # TODO: type } @@ -443,7 +460,10 @@ def deserialize(cls, data: JsonDict) -> 'FuncDef': body = Block([]) return FuncDef(data['name'], [Argument.deserialize(a) for a in data['arguments']], - body) + body, + (None if data['type'] is None + else mypy.types.FunctionLike.deserialize(data['type'])), + ) class Decorator(SymbolNode): @@ -473,6 +493,23 @@ def fullname(self) -> str: def accept(self, visitor: NodeVisitor[T]) -> T: return visitor.visit_decorator(self) + def serialize(self) -> JsonDict: + return {'.class': 'Decorator', + 'func': self.func.serialize(), + # TODO: 'decorators' + 'var': self.var.serialize(), + 'is_overload': self.is_overload, + } + + @classmethod + def deserialize(cls, data: JsonDict) -> 'Decorator': + assert data['.class'] == 'Decorator' + dec = Decorator(FuncDef.deserialize(data['func']), + [], # TODO: decorators + Var.deserialize(data['var'])) + dec.is_overload = data['is_overload'] + return dec + class Var(SymbolNode): """A variable. @@ -511,14 +548,14 @@ def accept(self, visitor: NodeVisitor[T]) -> T: return visitor.visit_var(self) def serialize(self) -> JsonDict: - res = {'.class': 'Var', - 'name': self._name, - } # type: JsonDict + data = {'.class': 'Var', + 'name': self._name, + } # type: JsonDict if self._fullname is not None: - res['fullname'] = self._fullname + data['fullname'] = self._fullname if self.type is not None: - res['type'] = self.type.serialize() - return res + data['type'] = self.type.serialize() + return data @classmethod def deserialize(cls, data: JsonDict) -> 'Var': @@ -1748,28 +1785,28 @@ def __str__(self) -> str: ('Names', sorted(self.names.keys()))], 'TypeInfo') - def serialize(self) -> Union[str, JsonDict]: + def serialize(self, full=False) -> Union[str, JsonDict]: fn = self.fullname() # TODO: When to return a name, when an object? - if fn: + if fn and not full: return fn - res = {'.class': 'TypeInfo', - 'name': self.name(), - 'fullname': self.fullname(), - 'mro': [t.serialize() for t in self.mro], - 'subtypes': [t.serialize() for t in self.subtypes], - 'names': self.names.serialize(), - 'is_abstract': self.is_abstract, - 'abstract_attributes': self.abstract_attributes, - 'is_enum': self.is_enum, - 'fallback_to_any': self.fallback_to_any, - 'type_vars': self.type_vars, - 'bases': [b.serialize() for b in self.bases], - '_promote': None if self._promote is None else self._promote.serialize(), - 'tuple_type': None if self.tuple_type is None else self.tuple_type.serialize(), - 'is_named_tuple': self.is_named_tuple, - } - return res + data = {'.class': 'TypeInfo', + 'name': self.name(), + 'fullname': self.fullname(), + 'mro': [t.serialize() for t in self.mro], + 'subtypes': [t.serialize() for t in self.subtypes], + 'names': self.names.serialize(), + 'is_abstract': self.is_abstract, + 'abstract_attributes': self.abstract_attributes, + 'is_enum': self.is_enum, + 'fallback_to_any': self.fallback_to_any, + 'type_vars': self.type_vars, + 'bases': [b.serialize() for b in self.bases], + '_promote': None if self._promote is None else self._promote.serialize(), + 'tuple_type': None if self.tuple_type is None else self.tuple_type.serialize(), + 'is_named_tuple': self.is_named_tuple, + } + return data @classmethod def deserialize(cls, data: Union[str, JsonDict]) -> 'TypeInfo': @@ -1869,20 +1906,35 @@ def __str__(self) -> str: return s def serialize(self) -> JsonDict: - res = {'.class': 'SymbolTableNode', - 'kind': node_kinds[self.kind], - } # type: JsonDict + data = {'.class': 'SymbolTableNode', + 'kind': node_kinds[self.kind], + } # type: JsonDict if self.kind == MODULE_REF: - res['module_ref'] = self.node.fullname() + data['module_ref'] = self.node.fullname() + elif self.kind == TYPE_ALIAS: + assert self.type_override is not None + assert self.node is not None + data['type'] = self.type_override.serialize() + data['node'] = self.node.serialize() else: - typ = self.type - if typ is not None: - if self.node is not None: - res['node'] = Var(self.node.name(), self.type).serialize() - else: - res['type'] = typ.serialize() - # TODO: else??? - return res + if isinstance(self.node, TypeInfo): + data['node'] = self.node.serialize(True) + typ = self.type + if typ is not None: + print('XXX Huh?', typ, 'for', self.node._fullname) + elif isinstance(self.node, FuncDef): + data['node'] = self.node.serialize() + typ = self.type + if typ is not None: + data['type'] = typ.serialize() + elif isinstance(self.node, (Var, TypeVarExpr, OverloadedFuncDef, Decorator)): + data['node'] = self.node.serialize() + # else? XXX + if len(data) == 2 and self.kind != UNBOUND_IMPORTED: + print('An unsupported case!') + import pdb # type: ignore + pdb.set_trace() + return data @classmethod def deserialize(cls, data: JsonDict) -> 'SymbolTableNode': @@ -1917,12 +1969,12 @@ def __str__(self) -> str: return '\n'.join(a) def serialize(self) -> JsonDict: - res = {'.class': 'SymbolTable'} # type: JsonDict + data = {'.class': 'SymbolTable'} # type: JsonDict for key, value in self.items(): if key == '__builtins__' or not value.module_public: continue - res[key] = value.serialize() - return res + data[key] = value.serialize() + return data @classmethod def deserialize(cls, data: JsonDict) -> 'SymbolTable': diff --git a/mypy/types.py b/mypy/types.py index 63b66e7991a9..a1445c7af83e 100644 --- a/mypy/types.py +++ b/mypy/types.py @@ -71,6 +71,25 @@ def __repr__(self) -> str: else: return self.name + def serialize(self) -> JsonDict: + return {'.class': 'TypeVarDef', + 'name': self.name, + 'id': self.id, + 'values': [v.serialize() for v in self.values], + 'upper_bound': self.upper_bound.serialize(), + 'variance': self.variance, + } + + @classmethod + def deserialize(cls, data: JsonDict) -> 'TypeVarDef': + assert data['.class'] == 'TypeVarDef' + return TypeVarDef(data['name'], + data['id'], + [Type.deserialize(v) for v in data['values']], + Type.deserialize(data['upper_bound']), + data['variance'], + ) + class UnboundType(Type): """Instance type that has not been bound during semantic analysis.""" @@ -212,12 +231,13 @@ def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_deleted_type(self) def serialize(self) -> JsonDict: - return {'.class': 'DeletedType'} + return {'.class': 'DeletedType', + 'source': self.source} @classmethod def deserialize(self, data: JsonDict) -> 'DeletedType': assert data['.class'] == 'DeletedType' - return DeletedType() + return DeletedType(data.get('source')) class Instance(Type): @@ -241,13 +261,15 @@ def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_instance(self) def serialize(self) -> JsonDict: - res = {'.class': 'Instance', - } # type: JsonDict + data = {'.class': 'Instance', + } # type: JsonDict if self.type is not None: - res['type'] = self.type.serialize() + data['type'] = self.type.serialize() if self.args: - res['args'] = [arg.serialize() for arg in self.args] - return res + data['args'] = [arg.serialize() for arg in self.args] + if self.erased: + data['erased'] = True + return data @classmethod def deserialize(cls, data: JsonDict) -> 'Instance': @@ -260,7 +282,7 @@ def deserialize(cls, data: JsonDict) -> 'Instance': args_list = data['args'] assert isinstance(args_list, list) args = [Type.deserialize(arg) for arg in args_list] - return Instance(typ, args) + return Instance(typ, args, erased=data.get('erased', False)) class TypeVarType(Type): @@ -326,6 +348,10 @@ def with_name(self, name: str) -> 'FunctionLike': pass # Corresponding instance type (e.g. builtins.type) fallback = None # type: Instance + @classmethod + def deserialize(cls, data: JsonDict) -> 'FunctionLike': + return cast(FunctionLike, super().deserialize(data)) + _dummy = object() # type: Any @@ -470,13 +496,17 @@ def serialize(self) -> JsonDict: 'ret_type': self.ret_type.serialize(), 'fallback': self.fallback.serialize(), 'name': self.name, - # TODO: definition, variables, bound_vars + 'variables': (None if self.variables is None + else [v.serialize() for v in self.variables]), + 'bound_vars': (None if self.bound_vars is None + else [[x, y.serialize()] for x, y in self.bound_vars]), 'is_ellipsis_args': self.is_ellipsis_args, } @classmethod def deserialize(cls, data: JsonDict) -> 'CallableType': assert data['.class'] == 'CallableType' + # TODO: Set definition to the containing SymbolNode? return CallableType([(None if t is None else Type.deserialize(t)) for t in data['arg_types']], data['arg_kinds'], @@ -484,7 +514,12 @@ def deserialize(cls, data: JsonDict) -> 'CallableType': Type.deserialize(data['ret_type']), Instance.deserialize(data['fallback']), name=data.get('name'), - # TODO: definition, variables, bound_vars + variables=(None if data.get('variables') is None + else [TypeVarDef.deserialize(v) + for v in data['variables']]), + bound_vars=(None if data.get('bound_vars') is None + else [(x, Type.deserialize(y)) + for x, y in data['bound_vars']]), is_ellipsis_args=data['is_ellipsis_args'], ) From d2491cd9c153af6c8ee2984081f766953980fb75 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 18 Feb 2016 17:41:55 -0800 Subject: [PATCH 009/117] Progress towards fixing up more cross references. But also more debugging. --- mypy/build.py | 56 ++++++++++++++++++++++++--------------------------- mypy/fixup.py | 29 ++++++++++++++++++++++---- mypy/nodes.py | 12 ++++++++--- 3 files changed, 60 insertions(+), 37 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 4eff32d7bd43..6af989b2a58d 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -441,8 +441,15 @@ def process(self, initial_states: List['UnprocessedBase']) -> BuildResult: self.trace('done') break + # XXX + self.trace('STATES OF THE WORLD') + for s in self.states: + self.trace(' id=%-15s ready=%-5s deps=%d (%2d) %s' % + (s.id, s.is_ready(), s.num_incomplete_deps(), s.state(), s.dependencies)) + self.trace('') + # Potentially output some debug information. - self.trace('next {}={} ({})'.format(next.id, next.path, next.state())) + self.trace('next {} ({})'.format(next.id, next.state())) # Set the import context for reporting error messages correctly. self.errors.set_import_context(next.import_context) @@ -736,9 +743,8 @@ def switch_state(self, state_object: 'State') -> None: for i in range(len(self.manager.states)): if self.manager.states[i].path == state_object.path: self.manager.states[i] = state_object - self.manager.trace('switch {}={} ({})'.format(state_object.id, - state_object.path, - state_object.state())) + self.manager.trace('switch {} ({})'.format(state_object.id, + state_object.state())) return raise RuntimeError('State for {} not found'.format(state_object.path)) @@ -847,21 +853,13 @@ def __init__(self, info: StateInfo, meta: CacheMeta) -> None: def load_dependencies(self): deps = self.meta.dependencies[:] - if self.id != 'builtins': + if self.id != 'builtins' and 'builtins' not in deps: deps.append('builtins') # Even cached modules need this. for dep_id in deps: if self.import_module(dep_id): self.dependencies.append(dep_id) # TODO: else fail(...) - def is_ready(self): - # Special case for builtins. - if self.id != 'builtins': - state = self.manager.module_state('builtins') - if state not in (UNSEEN_STATE, TYPE_CHECKED_STATE): - return False - return super().is_ready() - def process(self) -> None: """Transition to either UnprocessedFile or CacheLoadedFile. @@ -927,23 +925,24 @@ def __init__(self, info: StateInfo, meta: CacheMeta, data: Any) -> None: self.dependencies.extend(meta.dependencies) if self.id != 'builtins': self.dependencies.append('builtins') # Even cached modules need this. - self.data = data + + # Deserialize the tree now. + self.tree = MypyFile.deserialize(data) + + # Store the parsed module in the shared module symbol table. + self.manager.semantic_analyzer.modules[self.id] = self.tree def process(self) -> None: """Transition directly to TypeCheckedFile. - This deserializes the tree and patches up cross-references. + This patches up cross-references. """ - tree = MypyFile.deserialize(self.data) - - # Store the parsed module in the shared module symbol table. - self.manager.semantic_analyzer.modules[self.id] = tree - # Fix up various things in the symbol tables. + print() print('Fixing up', self.id) - fixup.fixup_symbol_table(tree.names, self.semantic_analyzer().modules) + fixup.fixup_symbol_table(self.tree.names, self.semantic_analyzer().modules) - file = TypeCheckedFile(self.info(), tree, self.meta) + file = TypeCheckedFile(self.info(), self.tree, self.meta) self.switch_state(file) def state(self) -> int: @@ -988,8 +987,7 @@ def process(self) -> None: p = '.'.join(c[:-1]) sem_anal = self.manager.semantic_analyzer if p in sem_anal.modules: - sem_anal.modules[p].names[c[-1]] = SymbolTableNode( - MODULE_REF, tree, p) + sem_anal.modules[p].names[c[-1]] = SymbolTableNode(MODULE_REF, tree, p) if self.id != 'builtins': # The builtins module is imported implicitly in every program (it @@ -1076,7 +1074,7 @@ def __init__(self, info: StateInfo, tree: MypyFile, imp.append('builtins') if imp != []: - self.manager.trace('{}={} dependencies: {}'.format(info.id, info.path, imp)) + self.manager.trace('{} dependencies: {}'.format(info.id, imp)) # Record the dependencies. Note that the dependencies list also # contains any superpackages and we must preserve them (e.g. os for @@ -1369,8 +1367,6 @@ def random_string(): def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: - if file.tree.is_stub: - return id = file.id if id == '__main__': return @@ -1401,9 +1397,7 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: 'mtime': mtime, 'size': size, 'data_mtime': data_mtime, - 'dependencies': [d - for d in file.dependencies - if not cast(TypeCheckedFile, manager.lookup_state(d)).tree.is_stub], + 'dependencies': file.dependencies, } with open(meta_json_tmp, 'w') as f: json.dump(meta, f, sort_keys=True) @@ -1411,6 +1405,8 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: os.rename(data_json_tmp, data_json) os.rename(meta_json_tmp, meta_json) + return + # Now, as a test, read it back. print() print('Reading what we wrote for', file.id, 'from', data_json) diff --git a/mypy/fixup.py b/mypy/fixup.py index a84bf6f95fa4..5d4d4c3027e2 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -9,15 +9,26 @@ def lookup_qualified(name: str, modules: Dict[str, MypyFile]) -> SymbolTableNode: parts = name.split('.') + # print(' Looking for module', parts) node = modules.get(parts[0]) if node is None: + print('*** Cannot find module', parts[0]) return None - for part in parts[1:-1]: + for i, part in enumerate(parts[1:-1], 1): + # print(' Looking for submodule', part, 'of package', parts[:i]) if part not in node.names: + print('*** Cannot find submodule', part, 'of package', parts[:i]) + return None + if node.names[part].node is None: + print('*** Weird!!!', part, 'exists in', parts[:i], 'but its node is None') return None node = cast(MypyFile, node.names[part].node) assert isinstance(node, MypyFile), node - return node.names.get(parts[-1]) + # print(' Looking for', parts[-1], 'in module', parts[:-1]) + res = node.names.get(parts[-1]) + if res is None: + print('*** Cannot find', parts[-1], 'in module', parts[:-1]) + return res def fixup_symbol_table(symtab: SymbolTable, modules: Dict[str, MypyFile]) -> None: @@ -25,17 +36,27 @@ def fixup_symbol_table(symtab: SymbolTable, modules: Dict[str, MypyFile]) -> Non if value.kind in (LDEF, MDEF, GDEF): if isinstance(value.node, Var): fixup_var(value.node, modules) + elif value.kind == MODULE_REF: + if value.module_ref not in modules: + print('*** Cannot find module', value.module_ref, 'needed for patch-up') + return + value.node = modules[value.module_ref] + # print('Fixed up module ref to', value.module_ref) def fixup_var(node: Var, modules: Dict[str, MypyFile]) -> None: if isinstance(node.type, Instance): if isinstance(node.type.type, TypeInfo): if node.type.type.is_dummy: - stnode = lookup_qualified(node.type.type.fullname(), modules) + fn = node.type.type.fullname() + stnode = lookup_qualified(fn, modules) + if stnode is None: + print('*** Cannot find', fn, 'needed to fix up', node) + return assert stnode is not None and stnode.kind == GDEF, stnode if isinstance(stnode.node, TypeInfo): node.type.type = stnode.node - print('Fixed up type for', node, 'from', stnode.node.fullname()) + # print('Fixed up type for', node, 'from', stnode.node.fullname()) else: assert False, stnode.node return diff --git a/mypy/nodes.py b/mypy/nodes.py index 77ef11958cc3..683586085c32 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -1864,8 +1864,10 @@ class SymbolTableNode: # If False, this name won't be imported via 'from import *'. # This has no effect on names within classes. module_public = True + # For deserialized MODULE_REF nodes, the referenced module name + module_ref = None # type: str - def __init__(self, kind: int, node: SymbolNode, mod_id: str = None, + def __init__(self, kind: int, node: Optional[SymbolNode], mod_id: str = None, typ: 'mypy.types.Type' = None, tvar_id: int = 0, module_public: bool = True) -> None: self.kind = kind @@ -1931,7 +1933,7 @@ def serialize(self) -> JsonDict: data['node'] = self.node.serialize() # else? XXX if len(data) == 2 and self.kind != UNBOUND_IMPORTED: - print('An unsupported case!') + print('An unsupported SymbolTableNode!') import pdb # type: ignore pdb.set_trace() return data @@ -1940,7 +1942,11 @@ def serialize(self) -> JsonDict: def deserialize(cls, data: JsonDict) -> 'SymbolTableNode': assert data['.class'] == 'SymbolTableNode' kind = inverse_node_kinds[data['kind']] - # NOTE: MODULE_REF needs to be fixed up in a later pass. + if kind == MODULE_REF: + # This needs to be fixed up in a later pass. + stnode = SymbolTableNode(kind, None) + stnode.module_ref = data['module_ref'] + return stnode typ = None node = None if 'type' in data: From 6dc9ba7d6aa367f384cd8c0814a0bc608d6f669b Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 18 Feb 2016 20:24:23 -0800 Subject: [PATCH 010/117] Do not serialize mro -- recompute it. --- mypy/nodes.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mypy/nodes.py b/mypy/nodes.py index 683586085c32..a9b8c399ffcf 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -1787,13 +1787,11 @@ def __str__(self) -> str: def serialize(self, full=False) -> Union[str, JsonDict]: fn = self.fullname() - # TODO: When to return a name, when an object? if fn and not full: return fn data = {'.class': 'TypeInfo', 'name': self.name(), 'fullname': self.fullname(), - 'mro': [t.serialize() for t in self.mro], 'subtypes': [t.serialize() for t in self.subtypes], 'names': self.names.serialize(), 'is_abstract': self.is_abstract, @@ -1825,7 +1823,6 @@ def deserialize(cls, data: Union[str, JsonDict]) -> 'TypeInfo': if isinstance(data, str): ti.is_dummy = True else: - ti.mro = [TypeInfo.deserialize(t) for t in data['mro']] ti.subtypes = {TypeInfo.deserialize(t) for t in data['subtypes']} ti.is_abstract = data['is_abstract'] ti.abstract_attributes = data['abstract_attributes'] @@ -1838,6 +1835,7 @@ def deserialize(cls, data: Union[str, JsonDict]) -> 'TypeInfo': ti.tuple_type = (None if data['tuple_type'] is None else mypy.types.TupleType.deserialize(data['tuple_type'])) ti.is_named_tuple = data['is_named_tuple'] + ti.calculate_mro() return ti From f5aabc2b40c2a0d7ae9b4738056b81296aaa2660 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 21 Feb 2016 20:42:47 -0800 Subject: [PATCH 011/117] Instance.serialize() always writes the TypeInfo's fullname. Different tack for fixup. --- mypy/build.py | 6 ++- mypy/fixup.py | 138 +++++++++++++++++++++++++++++++++++++------------- mypy/nodes.py | 49 +++++++----------- mypy/types.py | 13 +++-- 4 files changed, 134 insertions(+), 72 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 6af989b2a58d..6667952e7a8c 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -941,6 +941,9 @@ def process(self) -> None: print() print('Fixing up', self.id) fixup.fixup_symbol_table(self.tree.names, self.semantic_analyzer().modules) + # TODO: For import cycles, if not everything was fixed up, + # stay in this state and try again later (or move to one extra + # state, if two passes are always enough). file = TypeCheckedFile(self.info(), self.tree, self.meta) self.switch_state(file) @@ -1309,8 +1312,7 @@ def read_with_python_encoding(path: str, pyversion: Tuple[int, int]) -> str: # Experimental incremental loading -# TODO: Flags -# TODO: files on command line (but not __main__) +# TODO: Flags (e.g. py2, implicit-any) MYPY_CACHE = '.mypy_cache' diff --git a/mypy/fixup.py b/mypy/fixup.py index 5d4d4c3027e2..487cf5c86818 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -1,10 +1,111 @@ """Fix up various things after deserialization().""" -from typing import Dict, cast +# TODO: Handle import cycles better. Once several modules are all +# loaded, keep fixing them up until they are all fixed 100%. (This +# requires adding logic to build.py.) -from mypy.nodes import (MypyFile, SymbolTable, SymbolTableNode, TypeInfo, Var, +# TODO: Fix up info everywhere it occurs. + +from typing import Any, Dict, cast + +from mypy.nodes import (MypyFile, SymbolTable, SymbolTableNode, + TypeInfo, FuncDef, OverloadedFuncDef, Var, LDEF, MDEF, GDEF, MODULE_REF) -from mypy.types import Instance, CallableType +from mypy.types import Instance, CallableType, TypeVisitor +from mypy.visitor import NodeVisitor + + +def fixup_symbol_table(symtab: SymbolTable, modules: Dict[str, MypyFile], + info: TypeInfo = None, visitor: 'NodeFixer' = None) -> None: + if visitor is None: + visitor = NodeFixer(modules) + for key, value in symtab.items(): + if value.kind in (LDEF, MDEF, GDEF): + if isinstance(value.node, TypeInfo): + print('Descending', value.node.fullname()) + if value.node.names is not None: + fixup_symbol_table(value.node.names, modules, value.node, visitor) + if value.node._promote is not None: + value.node._promote.accept(visitor.type_fixer) + print("Calculating mro for", value.node.fullname()) + value.node.calculate_mro() + elif value.node is not None: + value.node.accept(visitor) + elif value.kind == MODULE_REF: + if value.module_ref not in modules: + print('*** Cannot find module', value.module_ref, 'needed for patch-up') + return + value.node = modules[value.module_ref] + # print('Fixed up module ref to', value.module_ref) + # TODO: Other kinds? + + +class NodeFixer(NodeVisitor[None]): + def __init__(self, modules: Dict[str, MypyFile]) -> None: + self.modules = modules + self.type_fixer = TypeFixer(self.modules) + + def visit_func_def(self, func: FuncDef) -> None: + if func.type is not None: + func.type.accept(self.type_fixer) + for arg in func.arguments: + if arg.type_annotation is not None: + arg.type_annotation.accept(self.type_fixer) + # TODO: Also fix up func.info here? + + def visit_overloaded_func_def(self, over: OverloadedFuncDef) -> None: + # TODO: Fix up func.info here? + pass + + def visit_var(self, v: Var) -> None: + if v.type is not None: + v.type.accept(self.type_fixer) + + +class TypeFixer(TypeVisitor[None]): + def __init__(self, modules: Dict[str, MypyFile]) -> None: + self.modules = modules + + def visit_instance(self, inst: Instance) -> None: + # TODO: Combine Instances that are exactly the same? + type_ref = inst.type_ref + if type_ref is not None: + del inst.type_ref + stnode =lookup_qualified(type_ref, self.modules) + if stnode is not None and isinstance(stnode.node, TypeInfo): + inst.type = stnode.node + + # TODO: Why are these abstract? + + def visit_any(self, o: Any) -> None: + pass + + def visit_callable_type(self, o: Any) -> None: + pass + + def visit_deleted_type(self, o: Any) -> None: + pass + + def visit_none_type(self, o: Any) -> None: + pass + + def visit_partial_type(self, o: Any) -> None: + pass + + def visit_tuple_type(self, o: Any) -> None: + pass + + def visit_type_var(self, o: Any) -> None: + pass + + def visit_unbound_type(self, o: Any) -> None: + pass + + def visit_union_type(self, o: Any) -> None: + pass + + def visit_void(self, o: Any) -> None: + pass def lookup_qualified(name: str, modules: Dict[str, MypyFile]) -> SymbolTableNode: @@ -29,34 +130,3 @@ def lookup_qualified(name: str, modules: Dict[str, MypyFile]) -> SymbolTableNode if res is None: print('*** Cannot find', parts[-1], 'in module', parts[:-1]) return res - - -def fixup_symbol_table(symtab: SymbolTable, modules: Dict[str, MypyFile]) -> None: - for key, value in symtab.items(): - if value.kind in (LDEF, MDEF, GDEF): - if isinstance(value.node, Var): - fixup_var(value.node, modules) - elif value.kind == MODULE_REF: - if value.module_ref not in modules: - print('*** Cannot find module', value.module_ref, 'needed for patch-up') - return - value.node = modules[value.module_ref] - # print('Fixed up module ref to', value.module_ref) - - -def fixup_var(node: Var, modules: Dict[str, MypyFile]) -> None: - if isinstance(node.type, Instance): - if isinstance(node.type.type, TypeInfo): - if node.type.type.is_dummy: - fn = node.type.type.fullname() - stnode = lookup_qualified(fn, modules) - if stnode is None: - print('*** Cannot find', fn, 'needed to fix up', node) - return - assert stnode is not None and stnode.kind == GDEF, stnode - if isinstance(stnode.node, TypeInfo): - node.type.type = stnode.node - # print('Fixed up type for', node, 'from', stnode.node.fullname()) - else: - assert False, stnode.node - return diff --git a/mypy/nodes.py b/mypy/nodes.py index a9b8c399ffcf..e9084852f50b 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -451,7 +451,6 @@ def serialize(self) -> JsonDict: 'name': self._name, 'arguments': [a.serialize() for a in self.arguments], 'type': None if self.type is None else self.type.serialize(), - # TODO: type } @classmethod @@ -1785,10 +1784,7 @@ def __str__(self) -> str: ('Names', sorted(self.names.keys()))], 'TypeInfo') - def serialize(self, full=False) -> Union[str, JsonDict]: - fn = self.fullname() - if fn and not full: - return fn + def serialize(self) -> Union[str, JsonDict]: data = {'.class': 'TypeInfo', 'name': self.name(), 'fullname': self.fullname(), @@ -1807,35 +1803,26 @@ def serialize(self, full=False) -> Union[str, JsonDict]: return data @classmethod - def deserialize(cls, data: Union[str, JsonDict]) -> 'TypeInfo': - if isinstance(data, str): - fullname = data - name = fullname.rsplit('.', 1)[-1] - names = SymbolTable() - else: - fullname = data['fullname'] - name = data['name'] - names = SymbolTable.deserialize(data['names']) + def deserialize(cls, data: JsonDict) -> 'TypeInfo': + fullname = data['fullname'] + name = data['name'] + names = SymbolTable.deserialize(data['names']) cdef = ClassDef(name, Block([])) cdef.fullname = fullname ti = TypeInfo(names, cdef) ti._fullname = fullname - if isinstance(data, str): - ti.is_dummy = True - else: - ti.subtypes = {TypeInfo.deserialize(t) for t in data['subtypes']} - ti.is_abstract = data['is_abstract'] - ti.abstract_attributes = data['abstract_attributes'] - ti.is_enum = data['is_enum'] - ti.fallback_to_any = data['fallback_to_any'] - ti.type_vars = data['type_vars'] - ti.bases = [mypy.types.Instance.deserialize(b) for b in data['bases']] - ti._promote = (None if data['_promote'] is None - else mypy.types.Type.deserialize(data['_promote'])) - ti.tuple_type = (None if data['tuple_type'] is None - else mypy.types.TupleType.deserialize(data['tuple_type'])) - ti.is_named_tuple = data['is_named_tuple'] - ti.calculate_mro() + ti.subtypes = {TypeInfo.deserialize(t) for t in data['subtypes']} + ti.is_abstract = data['is_abstract'] + ti.abstract_attributes = data['abstract_attributes'] + ti.is_enum = data['is_enum'] + ti.fallback_to_any = data['fallback_to_any'] + ti.type_vars = data['type_vars'] + ti.bases = [mypy.types.Instance.deserialize(b) for b in data['bases']] + ti._promote = (None if data['_promote'] is None + else mypy.types.Type.deserialize(data['_promote'])) + ti.tuple_type = (None if data['tuple_type'] is None + else mypy.types.TupleType.deserialize(data['tuple_type'])) + ti.is_named_tuple = data['is_named_tuple'] return ti @@ -1918,7 +1905,7 @@ def serialize(self) -> JsonDict: data['node'] = self.node.serialize() else: if isinstance(self.node, TypeInfo): - data['node'] = self.node.serialize(True) + data['node'] = self.node.serialize() typ = self.type if typ is not None: print('XXX Huh?', typ, 'for', self.node._fullname) diff --git a/mypy/types.py b/mypy/types.py index a1445c7af83e..a603107660a0 100644 --- a/mypy/types.py +++ b/mypy/types.py @@ -260,11 +260,13 @@ def __init__(self, typ: mypy.nodes.TypeInfo, args: List[Type], def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_instance(self) + type_ref = None # type: str + def serialize(self) -> JsonDict: data = {'.class': 'Instance', } # type: JsonDict if self.type is not None: - data['type'] = self.type.serialize() + data['type_ref'] = self.type.fullname() if self.args: data['args'] = [arg.serialize() for arg in self.args] if self.erased: @@ -274,15 +276,16 @@ def serialize(self) -> JsonDict: @classmethod def deserialize(cls, data: JsonDict) -> 'Instance': assert data['.class'] == 'Instance' - typ = None - if 'type' in data: - typ = mypy.nodes.TypeInfo.deserialize(data['type']) args = [] # type: List[Type] if 'args' in data: args_list = data['args'] assert isinstance(args_list, list) args = [Type.deserialize(arg) for arg in args_list] - return Instance(typ, args, erased=data.get('erased', False)) + inst = Instance(None, args, erased=data.get('erased', False)) + if 'type_ref' in data: + inst.type_ref = data['type_ref'] + # Will be fixed up by fixup.py later. + return inst class TypeVarType(Type): From bf291abb243bdd392c4229215abc1863006635d8 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 22 Feb 2016 08:47:19 -0800 Subject: [PATCH 012/117] Fixing the fixup code. Too much debugging prints though. --- mypy/build.py | 6 +++--- mypy/fixup.py | 59 +++++++++++++++++++++++++++++++++++++-------------- mypy/nodes.py | 2 ++ mypy/types.py | 4 ++-- 4 files changed, 50 insertions(+), 21 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 6667952e7a8c..17e1268baf3a 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -620,11 +620,11 @@ def maybe_make_cached_state(self, id: str, path: str) -> Optional['UnprocessedBa def log(self, message: str) -> None: if VERBOSE in self.flags: - print('LOG:', message, file=sys.stderr) + print('LOG:', message, file=sys.stderr, flush=True) def trace(self, message: str) -> None: if self.flags.count(VERBOSE) >= 2: - print('TRACE:', message, file=sys.stderr) + print('TRACE:', message, file=sys.stderr, flush=True) def remove_cwd_prefix_from_path(p: str) -> str: @@ -939,7 +939,7 @@ def process(self) -> None: """ # Fix up various things in the symbol tables. print() - print('Fixing up', self.id) + print('FIXING MODULE', self.id) fixup.fixup_symbol_table(self.tree.names, self.semantic_analyzer().modules) # TODO: For import cycles, if not everything was fixed up, # stay in this state and try again later (or move to one extra diff --git a/mypy/fixup.py b/mypy/fixup.py index 487cf5c86818..d80391f7251c 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -16,21 +16,15 @@ def fixup_symbol_table(symtab: SymbolTable, modules: Dict[str, MypyFile], - info: TypeInfo = None, visitor: 'NodeFixer' = None) -> None: - if visitor is None: - visitor = NodeFixer(modules) + info: TypeInfo = None) -> None: + node_fixer = NodeFixer(modules, info) for key, value in symtab.items(): if value.kind in (LDEF, MDEF, GDEF): if isinstance(value.node, TypeInfo): - print('Descending', value.node.fullname()) - if value.node.names is not None: - fixup_symbol_table(value.node.names, modules, value.node, visitor) - if value.node._promote is not None: - value.node._promote.accept(visitor.type_fixer) - print("Calculating mro for", value.node.fullname()) - value.node.calculate_mro() + # TypeInfo has no accept(). TODO: Add it? + node_fixer.visit_type_info(value.node) elif value.node is not None: - value.node.accept(visitor) + value.node.accept(node_fixer) elif value.kind == MODULE_REF: if value.module_ref not in modules: print('*** Cannot find module', value.module_ref, 'needed for patch-up') @@ -41,23 +35,55 @@ def fixup_symbol_table(symtab: SymbolTable, modules: Dict[str, MypyFile], class NodeFixer(NodeVisitor[None]): - def __init__(self, modules: Dict[str, MypyFile]) -> None: + def __init__(self, modules: Dict[str, MypyFile], info: TypeInfo = None) -> None: self.modules = modules self.type_fixer = TypeFixer(self.modules) + self.current_info = info + + # NOTE: This method isn't (yet) part of the NodeVisitor API. + def visit_type_info(self, info: TypeInfo) -> None: + save_info = self.current_info + try: + self.current_info = info + print('Descending', info.fullname()) + if info.names is not None: + fixup_symbol_table(info.names, self.modules, info) + print('Fixing up', info.fullname()) + if info.subtypes is not None: + for st in info.subtypes: + self.visit_type_info(st) + if info.bases is not None: + for base in info.bases: + base.accept(self.type_fixer) + if info._promote is not None: + info._promote.accept(self.type_fixer) + if info.tuple_type is not None: + info.tuple_type.accept(self.type_fixer) + print("Calculating mro for", info.fullname()) + info.calculate_mro() + print("MRO for", info.fullname(), info.mro) + finally: + self.current_info = save_info def visit_func_def(self, func: FuncDef) -> None: + if self.current_info is not None: + print(' Setting', repr(func), 'info to', repr(self.current_info)) + func.info = self.current_info + else: + print(' No info for', func) if func.type is not None: func.type.accept(self.type_fixer) for arg in func.arguments: if arg.type_annotation is not None: arg.type_annotation.accept(self.type_fixer) - # TODO: Also fix up func.info here? - def visit_overloaded_func_def(self, over: OverloadedFuncDef) -> None: - # TODO: Fix up func.info here? - pass + def visit_overloaded_func_def(self, func: OverloadedFuncDef) -> None: + if self.current_info is not None: + func.info = self.current_info def visit_var(self, v: Var) -> None: + if self.current_info is not None: + v.info = self.current_info if v.type is not None: v.type.accept(self.type_fixer) @@ -70,6 +96,7 @@ def visit_instance(self, inst: Instance) -> None: # TODO: Combine Instances that are exactly the same? type_ref = inst.type_ref if type_ref is not None: + print("Fixing instance", type_ref) del inst.type_ref stnode =lookup_qualified(type_ref, self.modules) if stnode is not None and isinstance(stnode.node, TypeInfo): diff --git a/mypy/nodes.py b/mypy/nodes.py index e9084852f50b..ca722564a402 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -2032,9 +2032,11 @@ class MroError(Exception): def linearize_hierarchy(info: TypeInfo) -> List[TypeInfo]: # TODO describe + print('Linearize', repr(info)) if info.mro: return info.mro bases = info.direct_base_classes() + if None in bases: print('SORRY!', repr(info), info.bases, bases); return [info] return [info] + merge([linearize_hierarchy(base) for base in bases] + [bases]) diff --git a/mypy/types.py b/mypy/types.py index a603107660a0..cc5575010a58 100644 --- a/mypy/types.py +++ b/mypy/types.py @@ -952,7 +952,7 @@ def visit_deleted_type(self, t): return "".format(t.source) def visit_instance(self, t): - s = t.type.fullname() + s = t.type.fullname() if t.type is not None else '' if t.erased: s += '*' if t.args != []: @@ -1011,7 +1011,7 @@ def visit_overloaded(self, t): def visit_tuple_type(self, t): s = self.list_str(t.items) - if t.fallback: + if t.fallback and t.fallback.type: fallback_name = t.fallback.type.fullname() if fallback_name != 'builtins.tuple': return 'Tuple[{}, fallback={}]'.format(s, t.fallback.accept(self)) From a182fd73283ebcae8b43d8357b2f6c3cd4c0060b Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 22 Feb 2016 12:04:34 -0800 Subject: [PATCH 013/117] Less debug output. The type fixer should now be (mostly) complete. --- mypy/fixup.py | 63 +++++++++++++++++++++++++++++---------------------- mypy/nodes.py | 1 - 2 files changed, 36 insertions(+), 28 deletions(-) diff --git a/mypy/fixup.py b/mypy/fixup.py index d80391f7251c..189fdbfdc692 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -11,7 +11,7 @@ from mypy.nodes import (MypyFile, SymbolTable, SymbolTableNode, TypeInfo, FuncDef, OverloadedFuncDef, Var, LDEF, MDEF, GDEF, MODULE_REF) -from mypy.types import Instance, CallableType, TypeVisitor +from mypy.types import Instance, CallableType, TupleType, TypeVarType, UnionType, TypeVisitor from mypy.visitor import NodeVisitor @@ -45,10 +45,10 @@ def visit_type_info(self, info: TypeInfo) -> None: save_info = self.current_info try: self.current_info = info - print('Descending', info.fullname()) + # print('Descending into', info.fullname()) if info.names is not None: fixup_symbol_table(info.names, self.modules, info) - print('Fixing up', info.fullname()) + # print('Fixing up', info.fullname()) if info.subtypes is not None: for st in info.subtypes: self.visit_type_info(st) @@ -59,18 +59,15 @@ def visit_type_info(self, info: TypeInfo) -> None: info._promote.accept(self.type_fixer) if info.tuple_type is not None: info.tuple_type.accept(self.type_fixer) - print("Calculating mro for", info.fullname()) + # print('Calculating mro for', info.fullname()) info.calculate_mro() - print("MRO for", info.fullname(), info.mro) + # print('MRO for', info.fullname(), info.mro) finally: self.current_info = save_info def visit_func_def(self, func: FuncDef) -> None: if self.current_info is not None: - print(' Setting', repr(func), 'info to', repr(self.current_info)) func.info = self.current_info - else: - print(' No info for', func) if func.type is not None: func.type.accept(self.type_fixer) for arg in func.arguments: @@ -96,43 +93,55 @@ def visit_instance(self, inst: Instance) -> None: # TODO: Combine Instances that are exactly the same? type_ref = inst.type_ref if type_ref is not None: - print("Fixing instance", type_ref) del inst.type_ref stnode =lookup_qualified(type_ref, self.modules) if stnode is not None and isinstance(stnode.node, TypeInfo): inst.type = stnode.node - # TODO: Why are these abstract? - def visit_any(self, o: Any) -> None: - pass + pass # Nothing to descend into. - def visit_callable_type(self, o: Any) -> None: - pass + def visit_callable_type(self, ct: CallableType) -> None: + if ct.arg_types: + for argt in ct.arg_types: + argt.accept(self) + if ct.ret_type is not None: + ct.ret_type.accept(self) + # TODO: What to do with ct.variables? def visit_deleted_type(self, o: Any) -> None: - pass + pass # Nothing to descend into. def visit_none_type(self, o: Any) -> None: - pass + pass # Nothing to descend into. def visit_partial_type(self, o: Any) -> None: - pass - - def visit_tuple_type(self, o: Any) -> None: - pass - - def visit_type_var(self, o: Any) -> None: - pass + raise RuntimeError("Shouldn't get here", o) + + def visit_tuple_type(self, tt: TupleType) -> None: + if tt.items: + for it in tt.items: + it.accept(self) + if tt.fallback is not None: + tt.fallback.accept(self) + + def visit_type_var(self, tvt: TypeVarType) -> None: + if tvt.values: + for vt in tvt.values: + vt.accept(self) + if tvt.upper_bound is not None: + tvt.upper_bound.accept(self) def visit_unbound_type(self, o: Any) -> None: - pass + raise RuntimeError("Shouldn't get here", o) - def visit_union_type(self, o: Any) -> None: - pass + def visit_union_type(self, ut: UnionType) -> None: + if ut.items: + for it in ut.items: + it.accept(self) def visit_void(self, o: Any) -> None: - pass + pass # Nothing to descend into. def lookup_qualified(name: str, modules: Dict[str, MypyFile]) -> SymbolTableNode: diff --git a/mypy/nodes.py b/mypy/nodes.py index ca722564a402..3e8f0e83633b 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -2032,7 +2032,6 @@ class MroError(Exception): def linearize_hierarchy(info: TypeInfo) -> List[TypeInfo]: # TODO describe - print('Linearize', repr(info)) if info.mro: return info.mro bases = info.direct_base_classes() From e6d7f5cb80681abc74cafd48c3f78762d011c2cf Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 22 Feb 2016 15:41:22 -0800 Subject: [PATCH 014/117] Lots more debugging around MRO calculation. Fixed a bug in serialization of OverloadedFuncDef (type was missing). --- mypy/fixup.py | 7 +++++-- mypy/nodes.py | 32 +++++++++++++++++++++++--------- mypy/semanal.py | 5 ++--- mypy/types.py | 2 +- 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/mypy/fixup.py b/mypy/fixup.py index 189fdbfdc692..a4a79a4fdfa3 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -59,9 +59,9 @@ def visit_type_info(self, info: TypeInfo) -> None: info._promote.accept(self.type_fixer) if info.tuple_type is not None: info.tuple_type.accept(self.type_fixer) - # print('Calculating mro for', info.fullname()) info.calculate_mro() - # print('MRO for', info.fullname(), info.mro) + if info.mro is None: + print('*** No MRO calculated for', info.fullname()) finally: self.current_info = save_info @@ -104,6 +104,9 @@ def visit_any(self, o: Any) -> None: def visit_callable_type(self, ct: CallableType) -> None: if ct.arg_types: for argt in ct.arg_types: + if argt is None: + import pdb # type: ignore + pdb.set_trace() argt.accept(self) if ct.ret_type is not None: ct.ret_type.accept(self) diff --git a/mypy/nodes.py b/mypy/nodes.py index 3e8f0e83633b..c95ec63b39ff 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -309,12 +309,16 @@ def accept(self, visitor: NodeVisitor[T]) -> T: def serialize(self) -> JsonDict: return {'.class': 'OverloadedFuncDef', 'items': [i.serialize() for i in self.items], + 'type': None if self.type is None else self.type.serialize(), } @classmethod def deserialize(cls, data: JsonDict) -> 'OverloadedFuncDef': assert data['.class'] == 'OverloadedFuncDef' - return OverloadedFuncDef([Decorator.deserialize(d) for d in data['items']]) + res = OverloadedFuncDef([Decorator.deserialize(d) for d in data['items']]) + if data.get('type') is not None: + res.type = mypy.types.Type.deserialize(data['type']) + return res class Argument(Node): @@ -1739,16 +1743,19 @@ def calculate_mro(self) -> None: Raise MroError if cannot determine mro. """ - self.mro = linearize_hierarchy(self) + mro = linearize_hierarchy(self) + if mro is None: return # TODO: Or raise MroError()? + self.mro = mro def has_base(self, fullname: str) -> bool: """Return True if type has a base type with the specified name. This can be either via extension or via implementation. """ - for cls in self.mro: - if cls.fullname() == fullname: - return True + if self.mro: + for cls in self.mro: + if cls.fullname() == fullname: + return True return False def all_subtypes(self) -> 'Set[TypeInfo]': @@ -2030,14 +2037,21 @@ class MroError(Exception): """Raised if a consistent mro cannot be determined for a class.""" -def linearize_hierarchy(info: TypeInfo) -> List[TypeInfo]: +def linearize_hierarchy(info: TypeInfo) -> Optional[List[TypeInfo]]: # TODO describe if info.mro: return info.mro bases = info.direct_base_classes() - if None in bases: print('SORRY!', repr(info), info.bases, bases); return [info] - return [info] + merge([linearize_hierarchy(base) for base in bases] + - [bases]) + lin_bases = [] + for base in bases: + if base is None: + return None + more_bases = linearize_hierarchy(base) + if more_bases is None: + return None + lin_bases.append(more_bases) + lin_bases.append(bases) + return [info] + merge(lin_bases) def merge(seqs: List[List[TypeInfo]]) -> List[TypeInfo]: diff --git a/mypy/semanal.py b/mypy/semanal.py index 53839d7798c4..6ee3d40318ef 100644 --- a/mypy/semanal.py +++ b/mypy/semanal.py @@ -1810,8 +1810,7 @@ def visit_member_expr(self, expr: MemberExpr) -> None: if isinstance(base, RefExpr) and cast(RefExpr, base).kind == MODULE_REF: file = cast(MypyFile, cast(RefExpr, base).node) - names = file.names - n = names.get(expr.name, None) + n = file.names.get(expr.name, None) if file is not None else None if n: n = self.normalize_type_alias(n, expr) if not n: @@ -1827,7 +1826,7 @@ def visit_member_expr(self, expr: MemberExpr) -> None: # one type checker run. If we reported errors here, # the build would terminate after semantic analysis # and we wouldn't be able to report any type errors. - full_name = '%s.%s' % (file.fullname(), expr.name) + full_name = '%s.%s' % (file.fullname() if file is not None else None, expr.name) if full_name in obsolete_name_mapping: self.fail("Module has no attribute %r (it's now called %r)" % ( expr.name, obsolete_name_mapping[full_name]), expr) diff --git a/mypy/types.py b/mypy/types.py index cc5575010a58..6793ad9dce0f 100644 --- a/mypy/types.py +++ b/mypy/types.py @@ -453,7 +453,7 @@ def copy_modified(self, ) def is_type_obj(self) -> bool: - return self.fallback.type.fullname() == 'builtins.type' + return self.fallback.type is not None and self.fallback.type.fullname() == 'builtins.type' def type_object(self) -> mypy.nodes.TypeInfo: assert self.is_type_obj() From 7f2e698e84ae139e706d7083ca0732a9a9869f98 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 22 Feb 2016 16:26:19 -0800 Subject: [PATCH 015/117] Fix a tricky issue where MRO linearization failed for the builtins/typing/abc cluster. --- mypy/fixup.py | 6 ++++++ mypy/nodes.py | 1 + 2 files changed, 7 insertions(+) diff --git a/mypy/fixup.py b/mypy/fixup.py index a4a79a4fdfa3..0008cdb7e7dd 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -97,6 +97,12 @@ def visit_instance(self, inst: Instance) -> None: stnode =lookup_qualified(type_ref, self.modules) if stnode is not None and isinstance(stnode.node, TypeInfo): inst.type = stnode.node + if inst.type.bases: + # Also fix up the bases, just in case. + for base in inst.type.bases: + if base.type is None: + base.accept(self) + def visit_any(self, o: Any) -> None: pass # Nothing to descend into. diff --git a/mypy/nodes.py b/mypy/nodes.py index c95ec63b39ff..ff9e25de340b 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -2045,6 +2045,7 @@ def linearize_hierarchy(info: TypeInfo) -> Optional[List[TypeInfo]]: lin_bases = [] for base in bases: if base is None: + print('*** Cannot linearize bases for', info.fullname(), bases) return None more_bases = linearize_hierarchy(base) if more_bases is None: From 90338c52ad3d9161652865d9c07aaa63e5a84469 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 23 Feb 2016 10:32:24 -0800 Subject: [PATCH 016/117] Improved (de)serialization of TypeInfo. Multi-pass fixup algorithm. --- mypy/build.py | 39 +++++++++++++++++++++++++++++++-------- mypy/fixup.py | 27 ++++++++++++++++++++++++--- mypy/nodes.py | 36 +++++++++++++++++++++++++++++------- 3 files changed, 84 insertions(+), 18 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 17e1268baf3a..7287be932f81 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -68,6 +68,8 @@ PROBABLY_CACHED_STATE = 1 # We've loaded the module from cache. CACHE_LOADED_STATE = 2 +# We've patched up cross-references. +CACHE_PATCHED_STATE = 3 # The source file has a state object, but we haven't done anything with it yet. UNPROCESSED_STATE = 11 # We've parsed the source file. @@ -933,23 +935,44 @@ def __init__(self, info: StateInfo, meta: CacheMeta, data: Any) -> None: self.manager.semantic_analyzer.modules[self.id] = self.tree def process(self) -> None: - """Transition directly to TypeCheckedFile. + """Transition to the next stage (CachePatchedFile).f This patches up cross-references. """ # Fix up various things in the symbol tables. - print() - print('FIXING MODULE', self.id) - fixup.fixup_symbol_table(self.tree.names, self.semantic_analyzer().modules) - # TODO: For import cycles, if not everything was fixed up, - # stay in this state and try again later (or move to one extra - # state, if two passes are always enough). + print() # TODO : Reduce debug prints + print('FIXING MODULE PASS ONE', self.id) + fixup.fixup_module_pass_one(self.tree, self.semantic_analyzer().modules) + file = CachePatchedFile(self.info(), self.tree, self.meta) + self.switch_state(file) + + def state(self) -> int: + return CACHE_LOADED_STATE + +class CachePatchedFile(State): + def __init__(self, info: StateInfo, tree: MypyFile, meta: CacheMeta) -> None: + super().__init__(info) + self.tree = tree + self.meta = meta + self.dependencies.extend(meta.dependencies) + if self.id != 'builtins': + self.dependencies.append('builtins') # Even cached modules need this. + + def process(self) -> None: + """Transition directly to TypeCheckedFile. + + This calculates the MROs for all classes. + """ + # Fix up various things in the symbol tables. + print() + print('FIXING MODULE PASS TWO', self.id) + fixup.fixup_module_pass_two(self.tree, self.semantic_analyzer().modules) file = TypeCheckedFile(self.info(), self.tree, self.meta) self.switch_state(file) def state(self) -> int: - return CACHE_LOADED_STATE + return CACHE_PATCHED_STATE class UnprocessedFile(UnprocessedBase): diff --git a/mypy/fixup.py b/mypy/fixup.py index 0008cdb7e7dd..9109743a47a5 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -15,6 +15,29 @@ from mypy.visitor import NodeVisitor +def fixup_module_pass_one(tree: MypyFile, modules: Dict[str, MypyFile]) -> None: + assert modules[tree.fullname()] is tree + fixup_symbol_table(tree.names, modules) + print('Done pass 1', tree.fullname()) + + +def fixup_module_pass_two(tree: MypyFile, modules: Dict[str, MypyFile]) -> None: + assert modules[tree.fullname()] is tree + compute_all_mros(tree.names, modules) + print('Done pass 2', tree.fullname()) + + +def compute_all_mros(symtab: SymbolTable, modules: Dict[str, MypyFile]) -> None: + for key, value in symtab.items(): + if value.kind in (LDEF, MDEF, GDEF) and isinstance(value.node, TypeInfo): + info = value.node + print(' Calc MRO for', info.fullname()) + info.calculate_mro() + if not info.mro: + print('*** No MRO calculated for', info.fullname()) + compute_all_mros(info.names, modules) + + def fixup_symbol_table(symtab: SymbolTable, modules: Dict[str, MypyFile], info: TypeInfo = None) -> None: node_fixer = NodeFixer(modules, info) @@ -34,6 +57,7 @@ def fixup_symbol_table(symtab: SymbolTable, modules: Dict[str, MypyFile], # TODO: Other kinds? +# TODO: FIx up .info when deserializing, i.e. much earlier. class NodeFixer(NodeVisitor[None]): def __init__(self, modules: Dict[str, MypyFile], info: TypeInfo = None) -> None: self.modules = modules @@ -59,9 +83,6 @@ def visit_type_info(self, info: TypeInfo) -> None: info._promote.accept(self.type_fixer) if info.tuple_type is not None: info.tuple_type.accept(self.type_fixer) - info.calculate_mro() - if info.mro is None: - print('*** No MRO calculated for', info.fullname()) finally: self.current_info = save_info diff --git a/mypy/nodes.py b/mypy/nodes.py index ff9e25de340b..1c54797363e2 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -609,6 +609,31 @@ def accept(self, visitor: NodeVisitor[T]) -> T: def is_generic(self) -> bool: return self.info.is_generic() + def serialize(self) -> JsonDict: + return {'.class': 'ClassDef', + 'name': self.name, + 'fullname': self.fullname, + 'type_vars': [v.serialize() for v in self.type_vars], + # TODO: base_types? + 'metaclass': self.metaclass, + # TODO: decorators? + 'is_builtinclass': self.is_builtinclass, + } + + @classmethod + def deserialize(self, data: JsonDict) -> 'ClassDef': + assert data['.class'] == 'ClassDef' + res = ClassDef(data['name'], + Block([]), + [mypy.types.TypeVarDef.deserialize(v) for v in data['type_vars']], + # TODO: base_types? + metaclass=data['metaclass'], + # TODO: decorators? + ) + res.fullname = data['fullname'] + res.is_builtinclass = data['is_builtinclass'] + return res + class GlobalDecl(Node): """Declaration global x, y, ...""" @@ -1793,10 +1818,10 @@ def __str__(self) -> str: def serialize(self) -> Union[str, JsonDict]: data = {'.class': 'TypeInfo', - 'name': self.name(), 'fullname': self.fullname(), 'subtypes': [t.serialize() for t in self.subtypes], 'names': self.names.serialize(), + 'defn': self.defn.serialize(), 'is_abstract': self.is_abstract, 'abstract_attributes': self.abstract_attributes, 'is_enum': self.is_enum, @@ -1811,13 +1836,10 @@ def serialize(self) -> Union[str, JsonDict]: @classmethod def deserialize(cls, data: JsonDict) -> 'TypeInfo': - fullname = data['fullname'] - name = data['name'] names = SymbolTable.deserialize(data['names']) - cdef = ClassDef(name, Block([])) - cdef.fullname = fullname - ti = TypeInfo(names, cdef) - ti._fullname = fullname + defn = ClassDef.deserialize(data['defn']) + ti = TypeInfo(names, defn) + ti._fullname = data['fullname'] ti.subtypes = {TypeInfo.deserialize(t) for t in data['subtypes']} ti.is_abstract = data['is_abstract'] ti.abstract_attributes = data['abstract_attributes'] From c48ed178d3c154f3a467919c0cb004b503f1fbc9 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 24 Feb 2016 08:46:30 -0800 Subject: [PATCH 017/117] Remove redundant final_passes(); it was a no-op. --- mypy/build.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 7287be932f81..d7fbaa891d6b 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -473,14 +473,6 @@ def process(self, initial_states: List['UnprocessedBase']) -> BuildResult: if self.errors.is_errors(): self.errors.raise_error() - # Collect a list of all files. - trees = [] # type: List[MypyFile] - for state in self.states: - trees.append(cast(ParsedFile, state).tree) - - # Perform any additional passes after type checking for all the files. - self.final_passes(trees, self.type_checker.type_map) - return BuildResult(self.semantic_analyzer.modules, self.type_checker.type_map) @@ -605,14 +597,6 @@ def is_module(self, id: str) -> bool: """Is there a file in the file system corresponding to module id?""" return find_module(id, self.lib_path) is not None - def final_passes(self, files: List[MypyFile], - types: Dict[Node, Type]) -> None: - """Perform the code generation passes for type checked files.""" - if self.target in [SEMANTIC_ANALYSIS, TYPE_CHECK]: - pass # Nothing to do. - else: - raise RuntimeError('Unsupported target %d' % self.target) - def maybe_make_cached_state(self, id: str, path: str) -> Optional['UnprocessedBase']: m = find_cache_meta(id, path, self) if m is None: From 2fcc2e02c39ea96637a223c27ce5da784fa7a00a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 24 Feb 2016 09:59:55 -0800 Subject: [PATCH 018/117] Refactor build states, on our way to cycle resilience. --- mypy/build.py | 95 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 39 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index d7fbaa891d6b..89e03f0a883c 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -443,12 +443,12 @@ def process(self, initial_states: List['UnprocessedBase']) -> BuildResult: self.trace('done') break - # XXX - self.trace('STATES OF THE WORLD') - for s in self.states: - self.trace(' id=%-15s ready=%-5s deps=%d (%2d) %s' % - (s.id, s.is_ready(), s.num_incomplete_deps(), s.state(), s.dependencies)) - self.trace('') + if self.flags.count(VERBOSE) >= 3: + self.trace('STATES OF THE WORLD') + for s in self.states: + self.trace(' id=%-15s ready=%-5s deps=%d (%2d) %s' % + (s.id, s.is_ready(), s.num_incomplete_deps(), s.state(), s.dependencies)) + self.trace('') # Potentially output some debug information. self.trace('next {} ({})'.format(next.id, next.state())) @@ -606,11 +606,13 @@ def maybe_make_cached_state(self, id: str, path: str) -> Optional['UnprocessedBa def log(self, message: str) -> None: if VERBOSE in self.flags: - print('LOG:', message, file=sys.stderr, flush=True) + print('LOG:', message, file=sys.stderr) + sys.stderr.flush() def trace(self, message: str) -> None: if self.flags.count(VERBOSE) >= 2: - print('TRACE:', message, file=sys.stderr, flush=True) + print('TRACE:', message, file=sys.stderr) + sys.stderr.flush() def remove_cwd_prefix_from_path(p: str) -> str: @@ -703,19 +705,8 @@ def is_ready(self) -> bool: return True def num_incomplete_deps(self) -> int: - """Return the number of dependencies that are incomplete. - - Here complete means that their state is *later* than this module. - Cyclic dependencies are omitted to break cycles forcibly (and somewhat - arbitrarily). - """ - incomplete = 0 - for module in self.dependencies: - state = self.manager.module_state(module) - if (not earlier_state(self.state(), state) and - not self.manager.is_dep(module, self.id)): - incomplete += 1 - return incomplete + """Return the number of dependencies that are ready but incomplete.""" + return 0 # Does not matter in this state def state(self) -> int: raise RuntimeError('Not implemented') @@ -841,9 +832,10 @@ def load_dependencies(self): deps = self.meta.dependencies[:] if self.id != 'builtins' and 'builtins' not in deps: deps.append('builtins') # Even cached modules need this. - for dep_id in deps: - if self.import_module(dep_id): - self.dependencies.append(dep_id) + for dep_id in deps + super_packages(self.id): + if dep_id not in self.dependencies: + if self.import_module(dep_id): + self.dependencies.append(dep_id) # TODO: else fail(...) def process(self) -> None: @@ -864,12 +856,12 @@ def process(self) -> None: ok = True for dep_id in self.dependencies: state_obj = self.manager.lookup_state(dep_id) - if (isinstance(state_obj, CacheLoadedFile) or - isinstance(state_obj, ProbablyCachedFile)): + if isinstance(state_obj, + (ProbablyCachedFile, CacheLoadedFile, + CachePatchedFile, CacheWithMroFile)): + continue + if isinstance(state_obj, TypeCheckedFile) and state_obj.meta: continue - if isinstance(state_obj, TypeCheckedFile): - if state_obj.meta is not None or dep_id == 'builtins': - continue self.manager.log('Abandoning cached data for {} ' 'because {} changed ({})'.format(self.id, state_obj.id, state_obj.__class__.__name__)) @@ -905,12 +897,16 @@ def state(self) -> int: class CacheLoadedFile(State): + # TODO: Deserialize tree in caller? def __init__(self, info: StateInfo, meta: CacheMeta, data: Any) -> None: super().__init__(info) self.meta = meta self.dependencies.extend(meta.dependencies) if self.id != 'builtins': self.dependencies.append('builtins') # Even cached modules need this. + for dep_id in super_packages(self.id): + if dep_id not in self.dependencies: + self.dependencies.append(dep_id) # Deserialize the tree now. self.tree = MypyFile.deserialize(data) @@ -919,11 +915,7 @@ def __init__(self, info: StateInfo, meta: CacheMeta, data: Any) -> None: self.manager.semantic_analyzer.modules[self.id] = self.tree def process(self) -> None: - """Transition to the next stage (CachePatchedFile).f - - This patches up cross-references. - """ - # Fix up various things in the symbol tables. + """Patch up cross-references and Transition to CachePatchedFile.""" print() # TODO : Reduce debug prints print('FIXING MODULE PASS ONE', self.id) fixup.fixup_module_pass_one(self.tree, self.semantic_analyzer().modules) @@ -934,6 +926,7 @@ def state(self) -> int: return CACHE_LOADED_STATE +# TODO: Inherit from CacheLoadedFile? class CachePatchedFile(State): def __init__(self, info: StateInfo, tree: MypyFile, meta: CacheMeta) -> None: super().__init__(info) @@ -942,16 +935,25 @@ def __init__(self, info: StateInfo, tree: MypyFile, meta: CacheMeta) -> None: self.dependencies.extend(meta.dependencies) if self.id != 'builtins': self.dependencies.append('builtins') # Even cached modules need this. + for dep_id in super_packages(self.id): + if dep_id not in self.dependencies: + self.dependencies.append(dep_id) def process(self) -> None: - """Transition directly to TypeCheckedFile. - - This calculates the MROs for all classes. - """ - # Fix up various things in the symbol tables. + """Calculate all MROs and transition to CacheWithMroFile.""" print() print('FIXING MODULE PASS TWO', self.id) fixup.fixup_module_pass_two(self.tree, self.semantic_analyzer().modules) + file = CacheWithMroFile(self.info(), self.tree, self.meta) + self.switch_state(file) + + def state(self) -> int: + return CACHE_PATCHED_STATE + + +class CacheWithMroFile(CachePatchedFile): + def process(self) -> None: + """Transition to TypeCheckedFile.""" file = TypeCheckedFile(self.info(), self.tree, self.meta) self.switch_state(file) @@ -1097,6 +1099,21 @@ def process(self) -> None: self.switch_state(PartiallySemanticallyAnalyzedFile(self.info(), self.tree)) + def num_incomplete_deps(self) -> int: + """Return the number of dependencies that are incomplete. + + Here complete means that their state is *later* than this module. + Cyclic dependencies are omitted to break cycles forcibly (and somewhat + arbitrarily). + """ + incomplete = 0 + for module in self.dependencies: + state = self.manager.module_state(module) + if (not earlier_state(self.state(), state) and + not self.manager.is_dep(module, self.id)): + incomplete += 1 + return incomplete + def state(self) -> int: return PARSED_STATE From 7a83a16bd05c8a29bc7fbe03cac85448851fe298 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 24 Feb 2016 16:17:17 -0800 Subject: [PATCH 019/117] Various small cleanups. --- mypy/build.py | 22 +++++++++++----------- mypy/fixup.py | 8 ++++++++ 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 89e03f0a883c..46cdea670b66 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -70,6 +70,8 @@ CACHE_LOADED_STATE = 2 # We've patched up cross-references. CACHE_PATCHED_STATE = 3 +# We've calculated MROs. +CACHE_WITH_MRO_STATE = 4 # The source file has a state object, but we haven't done anything with it yet. UNPROCESSED_STATE = 11 # We've parsed the source file. @@ -890,8 +892,6 @@ def process(self) -> None: file = UnprocessedFile(self.info(), text) self.switch_state(file) - # TODO: is_ready() that waits for dependencies to be out of limbo - def state(self) -> int: return PROBABLY_CACHED_STATE @@ -912,7 +912,7 @@ def __init__(self, info: StateInfo, meta: CacheMeta, data: Any) -> None: self.tree = MypyFile.deserialize(data) # Store the parsed module in the shared module symbol table. - self.manager.semantic_analyzer.modules[self.id] = self.tree + self.semantic_analyzer().modules[self.id] = self.tree def process(self) -> None: """Patch up cross-references and Transition to CachePatchedFile.""" @@ -958,7 +958,7 @@ def process(self) -> None: self.switch_state(file) def state(self) -> int: - return CACHE_PATCHED_STATE + return CACHE_WITH_MRO_STATE class UnprocessedFile(UnprocessedBase): @@ -984,20 +984,20 @@ def load_dependencies(self): def process(self) -> None: """Parse the file, store global names and advance to the next state.""" - if self.id in self.manager.semantic_analyzer.modules: + if self.id in self.semantic_analyzer().modules: self.fail(self.path, 1, "Duplicate module named '{}'".format(self.id)) return tree = self.parse(self.program_text, self.path) # Store the parsed module in the shared module symbol table. - self.manager.semantic_analyzer.modules[self.id] = tree + self.semantic_analyzer().modules[self.id] = tree if '.' in self.id: # Include module in the symbol table of the enclosing package. c = self.id.split('.') p = '.'.join(c[:-1]) - sem_anal = self.manager.semantic_analyzer + sem_anal = self.semantic_analyzer() if p in sem_anal.modules: sem_anal.modules[p].names[c[-1]] = SymbolTableNode(MODULE_REF, tree, p) @@ -1137,8 +1137,8 @@ def process(self) -> None: self.type_checker().visit_file(self.tree, self.tree.path) if DUMP_INFER_STATS in self.manager.flags: stats.dump_type_stats(self.tree, self.tree.path, inferred=True, - typemap=self.manager.type_checker.type_map) - self.manager.reports.file(self.tree, type_map=self.manager.type_checker.type_map) + typemap=self.type_checker().type_map) + self.manager.reports.file(self.tree, type_map=self.type_checker().type_map) # FIX remove from active state list to speed up processing @@ -1356,7 +1356,7 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache return None with open(meta_json, 'r') as f: meta_str = f.read() - manager.log('Meta {} {}'.format(id, meta_str.rstrip())) + manager.trace('Meta {} {}'.format(id, meta_str.rstrip())) meta = json.loads(meta_str) # TODO: Errors if not isinstance(meta, dict): return None @@ -1384,7 +1384,7 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache # TODO: stat() errors if os.path.getmtime(data_json) != m.data_mtime: return None - manager.log('Found {} {} {}'.format(id, meta_json, m)) + manager.log('Found {} {}'.format(id, meta_json)) return m diff --git a/mypy/fixup.py b/mypy/fixup.py index 9109743a47a5..6a5aeb487851 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -180,14 +180,20 @@ def lookup_qualified(name: str, modules: Dict[str, MypyFile]) -> SymbolTableNode node = modules.get(parts[0]) if node is None: print('*** Cannot find module', parts[0]) + import pdb # type: ignore + pdb.set_trace() return None for i, part in enumerate(parts[1:-1], 1): # print(' Looking for submodule', part, 'of package', parts[:i]) if part not in node.names: print('*** Cannot find submodule', part, 'of package', parts[:i]) + import pdb # type: ignore + pdb.set_trace() return None if node.names[part].node is None: print('*** Weird!!!', part, 'exists in', parts[:i], 'but its node is None') + import pdb # type: ignore + pdb.set_trace() return None node = cast(MypyFile, node.names[part].node) assert isinstance(node, MypyFile), node @@ -195,4 +201,6 @@ def lookup_qualified(name: str, modules: Dict[str, MypyFile]) -> SymbolTableNode res = node.names.get(parts[-1]) if res is None: print('*** Cannot find', parts[-1], 'in module', parts[:-1]) + import pdb # type: ignore + pdb.set_trace() return res From 924b08fe72c3e657186299cc59ec24285dee7bc8 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 24 Feb 2016 19:17:02 -0800 Subject: [PATCH 020/117] Rewrite fixit.lookup_qualified(). Rewrite is_ready() for cache states to use transitive closure. --- mypy/build.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ mypy/fixup.py | 44 +++++++++++++++++++------------------------- 2 files changed, 64 insertions(+), 25 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 46cdea670b66..0a20050c4f0c 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -469,6 +469,9 @@ def process(self, initial_states: List['UnprocessedBase']) -> BuildResult: # If there were no errors, all files should have been fully processed. for s in self.states: + if s.state() != FINAL_STATE: + import pdb # type: ignore + pdb.set_trace() assert s.state() == FINAL_STATE, ( '{} still unprocessed in state {}'.format(s.path, s.state())) @@ -830,6 +833,20 @@ def __init__(self, info: StateInfo, meta: CacheMeta) -> None: super().__init__(info) self.meta = meta + def is_ready(self) -> bool: + """Return True if all dependencies are at least in the same state + as this object (but not in the initial state), *and* the transitive + closure of dependencies is too. + """ + my_state = self.state() + # To avoid quadratic behavior of repeatedly calling module_state(), + # just loop once over all states. Note that is_dep() is heavily cached. + for state_obj in self.manager.states: + if self.manager.is_dep(self.id, state_obj.id): + if earlier_state(state_obj.state(), my_state): + return False + return True + def load_dependencies(self): deps = self.meta.dependencies[:] if self.id != 'builtins' and 'builtins' not in deps: @@ -914,6 +931,20 @@ def __init__(self, info: StateInfo, meta: CacheMeta, data: Any) -> None: # Store the parsed module in the shared module symbol table. self.semantic_analyzer().modules[self.id] = self.tree + def is_ready(self) -> bool: + """Return True if all dependencies are at least in the same state + as this object (but not in the initial state), *and* the transitive + closure of dependencies is too. + """ + my_state = self.state() + # To avoid quadratic behavior of repeatedly calling module_state(), + # just loop once over all states. Note that is_dep() is heavily cached. + for state_obj in self.manager.states: + if self.manager.is_dep(self.id, state_obj.id): + if earlier_state(state_obj.state(), my_state): + return False + return True + def process(self) -> None: """Patch up cross-references and Transition to CachePatchedFile.""" print() # TODO : Reduce debug prints @@ -939,6 +970,20 @@ def __init__(self, info: StateInfo, tree: MypyFile, meta: CacheMeta) -> None: if dep_id not in self.dependencies: self.dependencies.append(dep_id) + def is_ready(self) -> bool: + """Return True if all dependencies are at least in the same state + as this object (but not in the initial state), *and* the transitive + closure of dependencies is too. + """ + my_state = self.state() + # To avoid quadratic behavior of repeatedly calling module_state(), + # just loop once over all states. Note that is_dep() is heavily cached. + for state_obj in self.manager.states: + if self.manager.is_dep(self.id, state_obj.id): + if earlier_state(state_obj.state(), my_state): + return False + return True + def process(self) -> None: """Calculate all MROs and transition to CacheWithMroFile.""" print() diff --git a/mypy/fixup.py b/mypy/fixup.py index 6a5aeb487851..9d2b90d0e38c 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -175,32 +175,26 @@ def visit_void(self, o: Any) -> None: def lookup_qualified(name: str, modules: Dict[str, MypyFile]) -> SymbolTableNode: - parts = name.split('.') # print(' Looking for module', parts) - node = modules.get(parts[0]) - if node is None: - print('*** Cannot find module', parts[0]) - import pdb # type: ignore - pdb.set_trace() - return None - for i, part in enumerate(parts[1:-1], 1): - # print(' Looking for submodule', part, 'of package', parts[:i]) - if part not in node.names: - print('*** Cannot find submodule', part, 'of package', parts[:i]) + head = name + rest = [] + while True: + head, tail = head.rsplit('.', 1) + mod = modules.get(head) + if mod is not None: + rest.append(tail) + break + names = mod.names + while True: + if not rest: + print('*** Cannot find', name) import pdb # type: ignore pdb.set_trace() return None - if node.names[part].node is None: - print('*** Weird!!!', part, 'exists in', parts[:i], 'but its node is None') - import pdb # type: ignore - pdb.set_trace() - return None - node = cast(MypyFile, node.names[part].node) - assert isinstance(node, MypyFile), node - # print(' Looking for', parts[-1], 'in module', parts[:-1]) - res = node.names.get(parts[-1]) - if res is None: - print('*** Cannot find', parts[-1], 'in module', parts[:-1]) - import pdb # type: ignore - pdb.set_trace() - return res + key = rest.pop() + stnode = names[key] + if not rest: + return stnode + node = stnode.node + assert isinstance(node, TypeInfo) + names = cast(TypeInfo, node).names From e7d5772b609054c9c4f6c376a13fd9c8089ab590 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 25 Feb 2016 10:50:37 -0800 Subject: [PATCH 021/117] Be less shy about the type of variables and bound_vars -- they are always lists. --- mypy/types.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/mypy/types.py b/mypy/types.py index 6793ad9dce0f..4e3f64add8d8 100644 --- a/mypy/types.py +++ b/mypy/types.py @@ -237,7 +237,7 @@ def serialize(self) -> JsonDict: @classmethod def deserialize(self, data: JsonDict) -> 'DeletedType': assert data['.class'] == 'DeletedType' - return DeletedType(data.get('source')) + return DeletedType(data['source']) class Instance(Type): @@ -499,10 +499,8 @@ def serialize(self) -> JsonDict: 'ret_type': self.ret_type.serialize(), 'fallback': self.fallback.serialize(), 'name': self.name, - 'variables': (None if self.variables is None - else [v.serialize() for v in self.variables]), - 'bound_vars': (None if self.bound_vars is None - else [[x, y.serialize()] for x, y in self.bound_vars]), + 'variables': [v.serialize() for v in self.variables], + 'bound_vars': [[x, y.serialize()] for x, y in self.bound_vars], 'is_ellipsis_args': self.is_ellipsis_args, } @@ -516,13 +514,9 @@ def deserialize(cls, data: JsonDict) -> 'CallableType': data['arg_names'], Type.deserialize(data['ret_type']), Instance.deserialize(data['fallback']), - name=data.get('name'), - variables=(None if data.get('variables') is None - else [TypeVarDef.deserialize(v) - for v in data['variables']]), - bound_vars=(None if data.get('bound_vars') is None - else [(x, Type.deserialize(y)) - for x, y in data['bound_vars']]), + name=data['name'], + variables=[TypeVarDef.deserialize(v) for v in data['variables']], + bound_vars=[(x, Type.deserialize(y)) for x, y in data['bound_vars']], is_ellipsis_args=data['is_ellipsis_args'], ) From 2d1c3668f54e6afe593dbc6c961f6748348fc612 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 25 Feb 2016 10:52:47 -0800 Subject: [PATCH 022/117] Serialize decorators (at least simple ones). Explicitly ignore UNBOUND_IMPORTED. --- mypy/nodes.py | 45 +++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/mypy/nodes.py b/mypy/nodes.py index 1c54797363e2..21cdc2dd2202 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -113,6 +113,20 @@ def get_line(self) -> int: def accept(self, visitor: NodeVisitor[T]) -> T: raise RuntimeError('Not implemented') + # @abstractmethod # TODO + def serialize(self) -> Any: + raise NotImplementedError('Cannot serialize {} instance'.format(self.__class__.__name__)) + + @classmethod + def deserialize(cls, data: JsonDict) -> 'Node': + classname = data['.class'] + glo = globals() + if classname in glo: + cl = glo[classname] + if issubclass(cl, cls) and 'deserialize' in cl.__dict__: + return cl.deserialize(data) + raise NotImplementedError('unexpected .class {}'.format(classname)) + class SymbolNode(Node): # Nodes that can be stored in a symbol table. @@ -125,19 +139,9 @@ def name(self) -> str: pass @abstractmethod def fullname(self) -> str: pass - # @abstractmethod # TODO - def serialize(self) -> Any: - raise NotImplementedError('Cannot serialize {} instance'.format(self.__class__.__name__)) - @classmethod def deserialize(cls, data: JsonDict) -> 'SymbolNode': - classname = data['.class'] - glo = globals() - if classname in glo: - cl = glo[classname] - if 'deserialize' in cl.__dict__: - return cl.deserialize(data) - raise NotImplementedError('unexpected .class {}'.format(classname)) + return cast(SymbolNode, super().deserialize(data)) class MypyFile(SymbolNode): @@ -499,7 +503,7 @@ def accept(self, visitor: NodeVisitor[T]) -> T: def serialize(self) -> JsonDict: return {'.class': 'Decorator', 'func': self.func.serialize(), - # TODO: 'decorators' + 'decorators': [d.serialize() for d in self.decorators], 'var': self.var.serialize(), 'is_overload': self.is_overload, } @@ -508,7 +512,7 @@ def serialize(self) -> JsonDict: def deserialize(cls, data: JsonDict) -> 'Decorator': assert data['.class'] == 'Decorator' dec = Decorator(FuncDef.deserialize(data['func']), - [], # TODO: decorators + [Node.deserialize(d) for d in data['decorators']], Var.deserialize(data['var'])) dec.is_overload = data['is_overload'] return dec @@ -1046,6 +1050,15 @@ def __init__(self, name: str) -> None: def accept(self, visitor: NodeVisitor[T]) -> T: return visitor.visit_name_expr(self) + def serialize(self) -> JsonDict: + # TODO: kind, node, fullname, is_def + return {'.class': 'NameExpr', 'name': self.name} + + @classmethod + def deserialize(cls, data: JsonDict) -> 'NameExpr': + assert data['.class'] == 'NameExpr' + return NameExpr(data['name']) + class MemberExpr(RefExpr): """Member access expression x.y""" @@ -1945,7 +1958,11 @@ def serialize(self) -> JsonDict: data['type'] = typ.serialize() elif isinstance(self.node, (Var, TypeVarExpr, OverloadedFuncDef, Decorator)): data['node'] = self.node.serialize() - # else? XXX + else: + if self.kind == UNBOUND_IMPORTED: + pass # TODO + else: + print('XXX Huhhhh?', self.__dict__) # type: ignore if len(data) == 2 and self.kind != UNBOUND_IMPORTED: print('An unsupported SymbolTableNode!') import pdb # type: ignore From dadb7e86760e0c4f738a52941d90466c457cf315 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 25 Feb 2016 10:54:04 -0800 Subject: [PATCH 023/117] Fix up ct.bound_vars. Missing arg types are okay. --- mypy/fixup.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/mypy/fixup.py b/mypy/fixup.py index 9d2b90d0e38c..4616a734f7f2 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -31,7 +31,7 @@ def compute_all_mros(symtab: SymbolTable, modules: Dict[str, MypyFile]) -> None: for key, value in symtab.items(): if value.kind in (LDEF, MDEF, GDEF) and isinstance(value.node, TypeInfo): info = value.node - print(' Calc MRO for', info.fullname()) + # print(' Calc MRO for', info.fullname()) info.calculate_mro() if not info.mro: print('*** No MRO calculated for', info.fullname()) @@ -51,6 +51,8 @@ def fixup_symbol_table(symtab: SymbolTable, modules: Dict[str, MypyFile], elif value.kind == MODULE_REF: if value.module_ref not in modules: print('*** Cannot find module', value.module_ref, 'needed for patch-up') + ## import pdb # type: ignore + ## pdb.set_trace() return value.node = modules[value.module_ref] # print('Fixed up module ref to', value.module_ref) @@ -129,15 +131,19 @@ def visit_any(self, o: Any) -> None: pass # Nothing to descend into. def visit_callable_type(self, ct: CallableType) -> None: + if ct.fallback: + ct.fallback.accept(self) if ct.arg_types: for argt in ct.arg_types: - if argt is None: - import pdb # type: ignore - pdb.set_trace() - argt.accept(self) + # TODO: When is argt None? Maybe when no type is specified? + if argt is not None: + argt.accept(self) if ct.ret_type is not None: ct.ret_type.accept(self) # TODO: What to do with ct.variables? + if ct.bound_vars: + for i, t in ct.bound_vars: + t.accept(self) def visit_deleted_type(self, o: Any) -> None: pass # Nothing to descend into. From 56dc1e262564b79777013118642586b7d63e5ff8 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 25 Feb 2016 14:19:51 -0800 Subject: [PATCH 024/117] Use log() instead of print() for "FIXING MODULE PASS N". --- mypy/build.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 0a20050c4f0c..ba61f4a403bc 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -947,8 +947,7 @@ def is_ready(self) -> bool: def process(self) -> None: """Patch up cross-references and Transition to CachePatchedFile.""" - print() # TODO : Reduce debug prints - print('FIXING MODULE PASS ONE', self.id) + self.manager.log('FIXING MODULE PASS ONE {}'.format(self.id)) fixup.fixup_module_pass_one(self.tree, self.semantic_analyzer().modules) file = CachePatchedFile(self.info(), self.tree, self.meta) self.switch_state(file) @@ -986,8 +985,7 @@ def is_ready(self) -> bool: def process(self) -> None: """Calculate all MROs and transition to CacheWithMroFile.""" - print() - print('FIXING MODULE PASS TWO', self.id) + self.manager.log('FIXING MODULE PASS TWO {}'.format(self.id)) fixup.fixup_module_pass_two(self.tree, self.semantic_analyzer().modules) file = CacheWithMroFile(self.info(), self.tree, self.meta) self.switch_state(file) From 080b498cb3a51e5bbd760e7f107778cdfcd692f9 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 26 Feb 2016 10:25:56 -0800 Subject: [PATCH 025/117] Add visit decorator and overloaded. --- mypy/fixup.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/mypy/fixup.py b/mypy/fixup.py index 4616a734f7f2..c750cc20e8a1 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -9,22 +9,22 @@ from typing import Any, Dict, cast from mypy.nodes import (MypyFile, SymbolTable, SymbolTableNode, - TypeInfo, FuncDef, OverloadedFuncDef, Var, + TypeInfo, FuncDef, OverloadedFuncDef, Decorator, Var, LDEF, MDEF, GDEF, MODULE_REF) -from mypy.types import Instance, CallableType, TupleType, TypeVarType, UnionType, TypeVisitor +from mypy.types import Instance, CallableType, Overloaded, TupleType, TypeVarType, UnionType, TypeVisitor from mypy.visitor import NodeVisitor def fixup_module_pass_one(tree: MypyFile, modules: Dict[str, MypyFile]) -> None: assert modules[tree.fullname()] is tree fixup_symbol_table(tree.names, modules) - print('Done pass 1', tree.fullname()) + # print('Done pass 1', tree.fullname()) def fixup_module_pass_two(tree: MypyFile, modules: Dict[str, MypyFile]) -> None: assert modules[tree.fullname()] is tree compute_all_mros(tree.names, modules) - print('Done pass 2', tree.fullname()) + # print('Done pass 2', tree.fullname()) def compute_all_mros(symtab: SymbolTable, modules: Dict[str, MypyFile]) -> None: @@ -101,6 +101,10 @@ def visit_overloaded_func_def(self, func: OverloadedFuncDef) -> None: if self.current_info is not None: func.info = self.current_info + def visit_decorator(self, d: Decorator) -> None: + if self.current_info is not None: + d.var.info = self.current_info + def visit_var(self, v: Var) -> None: if self.current_info is not None: v.info = self.current_info @@ -145,6 +149,10 @@ def visit_callable_type(self, ct: CallableType) -> None: for i, t in ct.bound_vars: t.accept(self) + def visit_overloaded(self, t: Overloaded) -> None: + for ct in t.items(): + ct.accept(self) + def visit_deleted_type(self, o: Any) -> None: pass # Nothing to descend into. From 2ed9bb262fce53dbba208b64d066e41f600756ad Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 29 Feb 2016 12:12:19 -0800 Subject: [PATCH 026/117] Serialize UnboundType. --- mypy/fixup.py | 8 +++++--- mypy/types.py | 12 ++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/mypy/fixup.py b/mypy/fixup.py index c750cc20e8a1..563d01bef879 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -11,7 +11,8 @@ from mypy.nodes import (MypyFile, SymbolTable, SymbolTableNode, TypeInfo, FuncDef, OverloadedFuncDef, Decorator, Var, LDEF, MDEF, GDEF, MODULE_REF) -from mypy.types import Instance, CallableType, Overloaded, TupleType, TypeVarType, UnionType, TypeVisitor +from mypy.types import (Instance, CallableType, Overloaded, TupleType, + TypeVarType, UnboundType, UnionType, TypeVisitor) from mypy.visitor import NodeVisitor @@ -176,8 +177,9 @@ def visit_type_var(self, tvt: TypeVarType) -> None: if tvt.upper_bound is not None: tvt.upper_bound.accept(self) - def visit_unbound_type(self, o: Any) -> None: - raise RuntimeError("Shouldn't get here", o) + def visit_unbound_type(self, o: UnboundType) -> None: + for a in o.args: + a.accept(self) def visit_union_type(self, ut: UnionType) -> None: if ut.items: diff --git a/mypy/types.py b/mypy/types.py index 4e3f64add8d8..3b46adf5bc53 100644 --- a/mypy/types.py +++ b/mypy/types.py @@ -107,6 +107,18 @@ def __init__(self, name: str, args: List[Type] = None, line: int = -1) -> None: def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_unbound_type(self) + def serialize(self) -> JsonDict: + return {'.class': 'UnboundType', + 'name': self.name, + 'args': [a.serialize() for a in self.args], + } + + @classmethod + def deserialize(self, data: JsonDict) -> 'UnboundType': + assert data['.class'] == 'UnboundType' + return UnboundType(data['name'], + [Type.deserialize(a) for a in data['args']]) + class ErrorType(Type): """The error type is used as the result of failed type operations.""" From 93e14d1291ba70a866770d4b8a438ab3ad234718 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 1 Mar 2016 15:36:37 -0800 Subject: [PATCH 027/117] Prototype approach to class C(NamedTuple("C", ...)): ... --- mypy/build.py | 4 +- mypy/fixup.py | 164 ++++++++++++++++++++++++++++++++++++-------------- mypy/nodes.py | 5 ++ mypy/types.py | 2 +- 4 files changed, 129 insertions(+), 46 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index ba61f4a403bc..8a0f383237a0 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1442,8 +1442,10 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: path = file.path if path == '': return + manager.log('Cleaning {}'.format(id)) + fixup.cleanup_module(file.tree, manager.semantic_analyzer.modules) path = os.path.abspath(path) - manager.log('Dumping {} {}'.format(id, path)) + manager.log('Dumping {} to {}'.format(id, path)) st = os.stat(path) # TODO: Errors mtime = st.st_mtime size = st.st_size diff --git a/mypy/fixup.py b/mypy/fixup.py index 563d01bef879..6c17ad45f7db 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -1,24 +1,28 @@ -"""Fix up various things after deserialization().""" +"""Fix up various things after deserialization. -# TODO: Handle import cycles better. Once several modules are all -# loaded, keep fixing them up until they are all fixed 100%. (This -# requires adding logic to build.py.) +Also clean up a few things before serialization. +""" -# TODO: Fix up info everywhere it occurs. +from typing import Any, Dict, Optional, cast -from typing import Any, Dict, cast - -from mypy.nodes import (MypyFile, SymbolTable, SymbolTableNode, +from mypy.nodes import (MypyFile, SymbolNode, SymbolTable, SymbolTableNode, TypeInfo, FuncDef, OverloadedFuncDef, Decorator, Var, LDEF, MDEF, GDEF, MODULE_REF) -from mypy.types import (Instance, CallableType, Overloaded, TupleType, - TypeVarType, UnboundType, UnionType, TypeVisitor) +from mypy.types import (CallableType, EllipsisType, Instance, Overloaded, TupleType, + TypeList, TypeVarType, UnboundType, UnionType, TypeVisitor) from mypy.visitor import NodeVisitor +def cleanup_module(tree: MypyFile, modules: Dict[str, MypyFile]) -> None: + print("Cleaning", tree.fullname()) + node_cleaner = NodeCleaner(modules) + node_cleaner.visit_symbol_table(tree.names) + + def fixup_module_pass_one(tree: MypyFile, modules: Dict[str, MypyFile]) -> None: assert modules[tree.fullname()] is tree - fixup_symbol_table(tree.names, modules) + node_fixer = NodeFixer(modules) + node_fixer.visit_symbol_table(tree.names) # print('Done pass 1', tree.fullname()) @@ -39,33 +43,15 @@ def compute_all_mros(symtab: SymbolTable, modules: Dict[str, MypyFile]) -> None: compute_all_mros(info.names, modules) -def fixup_symbol_table(symtab: SymbolTable, modules: Dict[str, MypyFile], - info: TypeInfo = None) -> None: - node_fixer = NodeFixer(modules, info) - for key, value in symtab.items(): - if value.kind in (LDEF, MDEF, GDEF): - if isinstance(value.node, TypeInfo): - # TypeInfo has no accept(). TODO: Add it? - node_fixer.visit_type_info(value.node) - elif value.node is not None: - value.node.accept(node_fixer) - elif value.kind == MODULE_REF: - if value.module_ref not in modules: - print('*** Cannot find module', value.module_ref, 'needed for patch-up') - ## import pdb # type: ignore - ## pdb.set_trace() - return - value.node = modules[value.module_ref] - # print('Fixed up module ref to', value.module_ref) - # TODO: Other kinds? - - -# TODO: FIx up .info when deserializing, i.e. much earlier. +# TODO: Fix up .info when deserializing, i.e. much earlier. class NodeFixer(NodeVisitor[None]): - def __init__(self, modules: Dict[str, MypyFile], info: TypeInfo = None) -> None: + current_info = None # type: Optional[TypeInfo] + + def __init__(self, modules: Dict[str, MypyFile], type_fixer: 'TypeFixer' = None) -> None: self.modules = modules - self.type_fixer = TypeFixer(self.modules) - self.current_info = info + if type_fixer is None: + type_fixer = TypeFixer(self.modules) + self.type_fixer = type_fixer # NOTE: This method isn't (yet) part of the NodeVisitor API. def visit_type_info(self, info: TypeInfo) -> None: @@ -74,7 +60,7 @@ def visit_type_info(self, info: TypeInfo) -> None: self.current_info = info # print('Descending into', info.fullname()) if info.names is not None: - fixup_symbol_table(info.names, self.modules, info) + self.visit_symbol_table(info.names) # print('Fixing up', info.fullname()) if info.subtypes is not None: for st in info.subtypes: @@ -89,6 +75,26 @@ def visit_type_info(self, info: TypeInfo) -> None: finally: self.current_info = save_info + # NOTE: This method *definitely* isn't part of the NodeVisitor API. + def visit_symbol_table(self, symtab: SymbolTable) -> None: + for key, value in list(symtab.items()): # TODO: Only use list() when cleaning. + if value.kind in (LDEF, MDEF, GDEF): + if isinstance(value.node, TypeInfo): + # TypeInfo has no accept(). TODO: Add it? + self.visit_type_info(value.node) + elif value.node is not None: + value.node.accept(self) + elif value.kind == MODULE_REF: + self.visit_module_ref(value) + # TODO: Other kinds? + + # NOTE: Nor is this one. + def visit_module_ref(self, value: SymbolTableNode): + if value.module_ref not in self.modules: + print('*** Cannot find module', value.module_ref, 'needed for patch-up') + return + value.node = self.modules[value.module_ref] + def visit_func_def(self, func: FuncDef) -> None: if self.current_info is not None: func.info = self.current_info @@ -122,16 +128,16 @@ def visit_instance(self, inst: Instance) -> None: type_ref = inst.type_ref if type_ref is not None: del inst.type_ref - stnode =lookup_qualified(type_ref, self.modules) - if stnode is not None and isinstance(stnode.node, TypeInfo): - inst.type = stnode.node + node = lookup_qualified(self.modules, type_ref) + if isinstance(node, TypeInfo): + inst.type = node if inst.type.bases: + # TODO: Is this needed or redundant? # Also fix up the bases, just in case. for base in inst.type.bases: if base.type is None: base.accept(self) - def visit_any(self, o: Any) -> None: pass # Nothing to descend into. @@ -150,6 +156,9 @@ def visit_callable_type(self, ct: CallableType) -> None: for i, t in ct.bound_vars: t.accept(self) + def visit_ellipsis_type(self, e: EllipsisType) -> None: + pass # Nothing to descend into. + def visit_overloaded(self, t: Overloaded) -> None: for ct in t.items(): ct.accept(self) @@ -170,6 +179,10 @@ def visit_tuple_type(self, tt: TupleType) -> None: if tt.fallback is not None: tt.fallback.accept(self) + def visit_type_list(self, tl: TypeList) -> None: + for t in tl.items: + t.accept(self) + def visit_type_var(self, tvt: TypeVarType) -> None: if tvt.values: for vt in tvt.values: @@ -190,8 +203,30 @@ def visit_void(self, o: Any) -> None: pass # Nothing to descend into. -def lookup_qualified(name: str, modules: Dict[str, MypyFile]) -> SymbolTableNode: - # print(' Looking for module', parts) +class TypeCleaner(TypeFixer): + counter = 0 + + def visit_instance(self, inst: Instance) -> None: + info = inst.type + if info.alt_fullname is not None: + return + if lookup_qualified(self.modules, info.fullname()) is info: + return + self.counter += 1 + info.alt_fullname = info.fullname() + '$' + str(self.counter) + print("Set alt_fullname for", info.alt_fullname) + store_qualified(self.modules, info.alt_fullname, info) + + +class NodeCleaner(NodeFixer): + def __init__(self, modules: Dict[str, MypyFile]) -> None: + super().__init__(modules, TypeCleaner(modules)) + + def visit_module_ref(self, value: SymbolTableNode) -> None: + assert value.kind == MODULE_REF + + +def lookup_qualified(modules: Dict[str, MypyFile], name: str) -> SymbolNode: head = name rest = [] while True: @@ -208,9 +243,50 @@ def lookup_qualified(name: str, modules: Dict[str, MypyFile]) -> SymbolTableNode pdb.set_trace() return None key = rest.pop() + if key not in names: + print('*** Cannot find', key, 'for', name) + return None stnode = names[key] + node = stnode.node if not rest: - return stnode + return node + assert isinstance(node, TypeInfo) + names = cast(TypeInfo, node).names + + +def store_qualified(modules: Dict[str, MypyFile], name: str, info: SymbolNode) -> None: + print("store_qualified", name, repr(info)) + head = name + rest = [] + while True: + head, tail = head.rsplit('.', 1) + mod = modules.get(head) + if mod is not None: + rest.append(tail) + break + names = mod.names + while True: + if not rest: + print('*** Cannot find', name) + import pdb # type: ignore + pdb.set_trace() + return + key = rest.pop() + if key not in names: + if rest: + print('*** Cannot find', key, 'for', name) + return + # Store it. + # TODO: kind might be something else? + names[key] = SymbolTableNode(GDEF, info) + print('Stored', names[key]) + return + stnode = names[key] node = stnode.node + if not rest: + print('*** Overwriting!', name, stnode) + stnode.node = info + return assert isinstance(node, TypeInfo) names = cast(TypeInfo, node).names + diff --git a/mypy/nodes.py b/mypy/nodes.py index 21cdc2dd2202..253cb7b8fa76 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -1689,6 +1689,9 @@ class is generic then it will be a type constructor of higher kind. # Is this a dummy from deserialization? is_dummy = False + # Alternative to fullname() for 'anonymous' classes. + alt_fullname = None # type: Optional[str] + def __init__(self, names: 'SymbolTable', defn: ClassDef) -> None: """Initialize a TypeInfo.""" self.names = names @@ -1832,6 +1835,7 @@ def __str__(self) -> str: def serialize(self) -> Union[str, JsonDict]: data = {'.class': 'TypeInfo', 'fullname': self.fullname(), + 'alt_fullname': self.alt_fullname, 'subtypes': [t.serialize() for t in self.subtypes], 'names': self.names.serialize(), 'defn': self.defn.serialize(), @@ -1853,6 +1857,7 @@ def deserialize(cls, data: JsonDict) -> 'TypeInfo': defn = ClassDef.deserialize(data['defn']) ti = TypeInfo(names, defn) ti._fullname = data['fullname'] + ti.alt_fullname = data['alt_fullname'] ti.subtypes = {TypeInfo.deserialize(t) for t in data['subtypes']} ti.is_abstract = data['is_abstract'] ti.abstract_attributes = data['abstract_attributes'] diff --git a/mypy/types.py b/mypy/types.py index 3b46adf5bc53..42a5950927a3 100644 --- a/mypy/types.py +++ b/mypy/types.py @@ -278,7 +278,7 @@ def serialize(self) -> JsonDict: data = {'.class': 'Instance', } # type: JsonDict if self.type is not None: - data['type_ref'] = self.type.fullname() + data['type_ref'] = self.type.alt_fullname or self.type.fullname() if self.args: data['args'] = [arg.serialize() for arg in self.args] if self.erased: From adf7d6e14432874175b53dd021ce7b5d05a703fe Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 2 Mar 2016 10:55:10 -0800 Subject: [PATCH 028/117] Make the type fixer/cleaner recurse into Instance.args. --- mypy/fixup.py | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/mypy/fixup.py b/mypy/fixup.py index 6c17ad45f7db..d9feb4359c06 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -14,7 +14,7 @@ def cleanup_module(tree: MypyFile, modules: Dict[str, MypyFile]) -> None: - print("Cleaning", tree.fullname()) + # print("Cleaning", tree.fullname()) node_cleaner = NodeCleaner(modules) node_cleaner.visit_symbol_table(tree.names) @@ -126,17 +126,19 @@ def __init__(self, modules: Dict[str, MypyFile]) -> None: def visit_instance(self, inst: Instance) -> None: # TODO: Combine Instances that are exactly the same? type_ref = inst.type_ref - if type_ref is not None: - del inst.type_ref - node = lookup_qualified(self.modules, type_ref) - if isinstance(node, TypeInfo): - inst.type = node - if inst.type.bases: - # TODO: Is this needed or redundant? - # Also fix up the bases, just in case. - for base in inst.type.bases: - if base.type is None: - base.accept(self) + if type_ref is None: + return # We've already been here. + del inst.type_ref + node = lookup_qualified(self.modules, type_ref) + if isinstance(node, TypeInfo): + inst.type = node + # TODO: Is this needed or redundant? + # Also fix up the bases, just in case. + for base in inst.type.bases: + if base.type is None: + base.accept(self) + for a in inst.args: + a.accept(self) def visit_any(self, o: Any) -> None: pass # Nothing to descend into. @@ -209,13 +211,14 @@ class TypeCleaner(TypeFixer): def visit_instance(self, inst: Instance) -> None: info = inst.type if info.alt_fullname is not None: - return - if lookup_qualified(self.modules, info.fullname()) is info: - return - self.counter += 1 - info.alt_fullname = info.fullname() + '$' + str(self.counter) - print("Set alt_fullname for", info.alt_fullname) - store_qualified(self.modules, info.alt_fullname, info) + return # We've already been here + if lookup_qualified(self.modules, info.fullname()) is not info: + self.counter += 1 + info.alt_fullname = info.fullname() + '$' + str(self.counter) + print("Set alt_fullname for", info.alt_fullname) + store_qualified(self.modules, info.alt_fullname, info) + for a in inst.args: + a.accept(self) class NodeCleaner(NodeFixer): From 92ecd7ed9cc74aa0aa8bf804d5c425fd263f22cf Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 2 Mar 2016 16:36:23 -0800 Subject: [PATCH 029/117] Serialize TypeList and EllipsisType. --- mypy/types.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/mypy/types.py b/mypy/types.py index 42a5950927a3..2aa8bfc190b2 100644 --- a/mypy/types.py +++ b/mypy/types.py @@ -144,6 +144,16 @@ def __init__(self, items: List[Type], line: int = -1) -> None: def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_type_list(self) + def serialize(self) -> JsonDict: + return {'.class': 'TypeList', + 'items': [t.serialize() for t in self.items], + } + + @classmethod + def deserialize(self, data: JsonDict) -> 'TypeList': + assert data['.class'] == 'TypeList' + return TypeList([Type.deserialize(t) for t in data['items']]) + class AnyType(Type): """The type 'Any'.""" @@ -750,6 +760,14 @@ class EllipsisType(Type): def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_ellipsis_type(self) + def serialize(self) -> JsonDict: + return {'.class': 'EllipsisType'} + + @classmethod + def deserialize(self, data: JsonDict) -> 'EllipsisType': + assert data['.class'] == 'EllipsisType' + return EllipsisType() + # # Visitor-related classes From 27546953a0c8b842a91ce4a566737f3b73290893 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 2 Mar 2016 16:36:38 -0800 Subject: [PATCH 030/117] Serialize Argument.type_annotation. --- mypy/nodes.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/mypy/nodes.py b/mypy/nodes.py index 253cb7b8fa76..3b6b682f1df3 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -366,19 +366,22 @@ def set_line(self, target: Union[Token, Node, int]) -> Node: self.initialization_statement.lvalues[0].set_line(self.line) def serialize(self) -> JsonDict: - data = {'.class': 'Argument'} # type: JsonDict - data['variable'] = self.variable.serialize() - # TODO: type_annotation - # TODO: initializer - data['kind'] = self.kind + data = {'.class': 'Argument', + 'kind': self.kind, + 'variable': self.variable.serialize(), + 'type_annotation': (None if self.type_annotation is None + else self.type_annotation.serialize()), + } # type: JsonDict + # TODO: initializer? return data @classmethod def deserialize(cls, data: JsonDict) -> 'Argument': assert data['.class'] == 'Argument' return Argument(Var.deserialize(data['variable']), - None, # TODO: type_annotation - None, # TODO: initializer + (None if data.get('type_annotation') is None + else mypy.types.Type.deserialize(data['type_annotation'])), + None, # TODO: initializer? kind=data['kind']) From f00e8e052ae91829a22fcfb6a58abc319dca52cb Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 2 Mar 2016 18:07:46 -0800 Subject: [PATCH 031/117] Fix up SymbolNode.type. --- mypy/fixup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mypy/fixup.py b/mypy/fixup.py index d9feb4359c06..fdc4c3931d5c 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -84,6 +84,8 @@ def visit_symbol_table(self, symtab: SymbolTable) -> None: self.visit_type_info(value.node) elif value.node is not None: value.node.accept(self) + if value.type is not None: + value.type.accept(self.type_fixer) elif value.kind == MODULE_REF: self.visit_module_ref(value) # TODO: Other kinds? @@ -227,6 +229,7 @@ def __init__(self, modules: Dict[str, MypyFile]) -> None: def visit_module_ref(self, value: SymbolTableNode) -> None: assert value.kind == MODULE_REF + # TODO: Now what? def lookup_qualified(modules: Dict[str, MypyFile], name: str) -> SymbolNode: From 5890f671ebde5edb0df2de074b040ddb395c6108 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 2 Mar 2016 18:08:23 -0800 Subject: [PATCH 032/117] Serialize implicit flag. Reformat CallableType a bit. --- mypy/types.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/mypy/types.py b/mypy/types.py index 2aa8bfc190b2..b867a572e012 100644 --- a/mypy/types.py +++ b/mypy/types.py @@ -387,9 +387,10 @@ class CallableType(FunctionLike): arg_types = None # type: List[Type] # Types of function arguments arg_kinds = None # type: List[int] # mypy.nodes.ARG_ constants arg_names = None # type: List[str] # None if not a keyword argument - min_args = 0 # Minimum number of arguments - is_var_arg = False # Is it a varargs function? - ret_type = None # type:Type # Return value type + min_args = 0 # Minimum number of arguments; derived from arg_kinds + is_var_arg = False # Is it a varargs function? derived from arg_kinds + ret_type = None # type: Type # Return value type + fallback = None # type: Instance name = '' # Name (may be None; for error messages) definition = None # type: SymbolNode # For error messages. May be None. # Type variables for a generic function @@ -415,7 +416,8 @@ class CallableType(FunctionLike): # Was this type implicitly generated instead of explicitly specified by the user? implicit = False - def __init__(self, arg_types: List[Type], + def __init__(self, + arg_types: List[Type], arg_kinds: List[int], arg_names: List[str], ret_type: Type, @@ -521,9 +523,11 @@ def serialize(self) -> JsonDict: 'ret_type': self.ret_type.serialize(), 'fallback': self.fallback.serialize(), 'name': self.name, + # We don't serialize the definition (only used for error messages). 'variables': [v.serialize() for v in self.variables], 'bound_vars': [[x, y.serialize()] for x, y in self.bound_vars], 'is_ellipsis_args': self.is_ellipsis_args, + 'implicit': self.implicit, } @classmethod @@ -540,6 +544,7 @@ def deserialize(cls, data: JsonDict) -> 'CallableType': variables=[TypeVarDef.deserialize(v) for v in data['variables']], bound_vars=[(x, Type.deserialize(y)) for x, y in data['bound_vars']], is_ellipsis_args=data['is_ellipsis_args'], + implicit=data['implicit'], ) From 95e044991da7d16d3a57cfcd8a146b8604936aa6 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 3 Mar 2016 08:32:00 -0800 Subject: [PATCH 033/117] Must visit OverloadedFuncDef.type. Clean up visit_callable_types(). --- mypy/fixup.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mypy/fixup.py b/mypy/fixup.py index fdc4c3931d5c..b1a44a59ea6e 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -109,6 +109,8 @@ def visit_func_def(self, func: FuncDef) -> None: def visit_overloaded_func_def(self, func: OverloadedFuncDef) -> None: if self.current_info is not None: func.info = self.current_info + if func.type: + func.type.accept(self.type_fixer) def visit_decorator(self, d: Decorator) -> None: if self.current_info is not None: @@ -148,17 +150,15 @@ def visit_any(self, o: Any) -> None: def visit_callable_type(self, ct: CallableType) -> None: if ct.fallback: ct.fallback.accept(self) - if ct.arg_types: - for argt in ct.arg_types: - # TODO: When is argt None? Maybe when no type is specified? - if argt is not None: - argt.accept(self) + for argt in ct.arg_types: + # TODO: When is argt None? Maybe when no type is specified? + if argt is not None: + argt.accept(self) if ct.ret_type is not None: ct.ret_type.accept(self) # TODO: What to do with ct.variables? - if ct.bound_vars: - for i, t in ct.bound_vars: - t.accept(self) + for i, t in ct.bound_vars: + t.accept(self) def visit_ellipsis_type(self, e: EllipsisType) -> None: pass # Nothing to descend into. From dfa9a652b621f97d40440dd1b96007fe0ce1986a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 3 Mar 2016 08:52:26 -0800 Subject: [PATCH 034/117] Visit contents of CallableType.variables. --- mypy/fixup.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mypy/fixup.py b/mypy/fixup.py index b1a44a59ea6e..ffa6aeb23414 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -151,12 +151,15 @@ def visit_callable_type(self, ct: CallableType) -> None: if ct.fallback: ct.fallback.accept(self) for argt in ct.arg_types: - # TODO: When is argt None? Maybe when no type is specified? + # argt may be None, e.g. for __self in NamedTuple constructors. if argt is not None: argt.accept(self) if ct.ret_type is not None: ct.ret_type.accept(self) - # TODO: What to do with ct.variables? + for v in ct.variables: + for val in v.values: + val.accept(self) + v.upper_bound.accept(self) for i, t in ct.bound_vars: t.accept(self) From a5ac6bb38130bf00308d3f409b99f6605f3e7e5a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 4 Mar 2016 09:51:05 -0800 Subject: [PATCH 035/117] Serialize complete Var. --- mypy/nodes.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/mypy/nodes.py b/mypy/nodes.py index 3b6b682f1df3..60b060896733 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -558,24 +558,34 @@ def accept(self, visitor: NodeVisitor[T]) -> T: return visitor.visit_var(self) def serialize(self) -> JsonDict: + # TODO: Leave default values out? data = {'.class': 'Var', 'name': self._name, + 'fullname': self._fullname, + 'type': None if self.type is None else self.type.serialize(), + 'is_self': self.is_self, + 'is_ready': self.is_ready, # TODO: is this needed? + 'is_initialized_in_class': self.is_initialized_in_class, + 'is_staticmethod': self.is_staticmethod, + 'is_classmethod': self.is_classmethod, + 'is_property': self.is_property, + 'is_settable_property': self.is_settable_property, } # type: JsonDict - if self._fullname is not None: - data['fullname'] = self._fullname - if self.type is not None: - data['type'] = self.type.serialize() return data @classmethod def deserialize(cls, data: JsonDict) -> 'Var': assert data['.class'] == 'Var' name = data['name'] - type = None - if 'type' in data: - type = mypy.types.Type.deserialize(data['type']) + type = None if data['type'] is None else mypy.types.Type.deserialize(data['type']) v = Var(name, type) - v._fullname = data.get('fullname') + v._fullname = data['fullname'] + v.is_self = data['is_self'] + v.is_initialized_in_class = data['is_initialized_in_class'] + v.is_staticmethod = data['is_staticmethod'] + v.is_classmethod = data['is_classmethod'] + v.is_property = data['is_property'] + v.is_settable_property = data['is_settable_property'] return v From baddc7566289e3631d9568015632b162ed16e002 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 4 Mar 2016 11:21:42 -0800 Subject: [PATCH 036/117] Serialize all of NameExpr. --- mypy/nodes.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/mypy/nodes.py b/mypy/nodes.py index 60b060896733..6b024efe07ec 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -1064,13 +1064,25 @@ def accept(self, visitor: NodeVisitor[T]) -> T: return visitor.visit_name_expr(self) def serialize(self) -> JsonDict: - # TODO: kind, node, fullname, is_def - return {'.class': 'NameExpr', 'name': self.name} + return {'.class': 'NameExpr', + 'kind': self.kind, + 'node': None if self.node is None else self.node.serialize(), + 'fullname': self.fullname, + 'is_def': self.is_def, + 'name': self.name, + 'literal': self.literal, + } @classmethod def deserialize(cls, data: JsonDict) -> 'NameExpr': assert data['.class'] == 'NameExpr' - return NameExpr(data['name']) + ret = NameExpr(data['name']) + ret.kind = data['kind'] + ret.node = None if data['node'] is None else Node.deserialize(data['node']) + ret.fullname = data['fullname'] + ret.is_def = data['is_def'] + ret.literal = data['literal'] + return ret class MemberExpr(RefExpr): From 4503c8bdf31dbef3ca21e2179866954647e4a67e Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 4 Mar 2016 11:55:13 -0800 Subject: [PATCH 037/117] Serialize TupleType.implicit. --- mypy/types.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mypy/types.py b/mypy/types.py index b867a572e012..e78105aa1090 100644 --- a/mypy/types.py +++ b/mypy/types.py @@ -632,14 +632,15 @@ def serialize(self) -> JsonDict: return {'.class': 'TupleType', 'items': [t.serialize() for t in self.items], 'fallback': self.fallback.serialize(), - # TODO: implicit + 'implicit': self.implicit, } @classmethod def deserialize(cls, data: JsonDict) -> 'TupleType': assert data['.class'] == 'TupleType' return TupleType([Type.deserialize(t) for t in data['items']], - Instance.deserialize(data['fallback'])) + Instance.deserialize(data['fallback']), + implicit=data['implicit']) class StarType(Type): From 0bf01878b2ab35cf5c28ade7cbd5a031e36a0468 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 4 Mar 2016 11:55:34 -0800 Subject: [PATCH 038/117] Serialize (mostly) complete ClassDef. --- mypy/nodes.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/mypy/nodes.py b/mypy/nodes.py index 6b024efe07ec..335723bd021d 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -606,16 +606,16 @@ class ClassDef(Node): # Built-in/extension class? (single implementation inheritance only) is_builtinclass = False - def __init__(self, name: str, defs: 'Block', + def __init__(self, + name: str, + defs: 'Block', type_vars: List['mypy.types.TypeVarDef'] = None, base_type_exprs: List[Node] = None, metaclass: str = None) -> None: - if not base_type_exprs: - base_type_exprs = [] self.name = name self.defs = defs self.type_vars = type_vars or [] - self.base_type_exprs = base_type_exprs + self.base_type_exprs = base_type_exprs or [] self.base_types = [] # Not yet semantically analyzed --> don't know base types self.metaclass = metaclass self.decorators = [] @@ -627,13 +627,14 @@ def is_generic(self) -> bool: return self.info.is_generic() def serialize(self) -> JsonDict: + # Not serialized: defs, base_type_exprs return {'.class': 'ClassDef', 'name': self.name, 'fullname': self.fullname, 'type_vars': [v.serialize() for v in self.type_vars], - # TODO: base_types? + 'base_types': [t.serialize() for t in self.base_types], 'metaclass': self.metaclass, - # TODO: decorators? + 'decorators': [d.serialize() for d in self.decorators], 'is_builtinclass': self.is_builtinclass, } @@ -643,11 +644,11 @@ def deserialize(self, data: JsonDict) -> 'ClassDef': res = ClassDef(data['name'], Block([]), [mypy.types.TypeVarDef.deserialize(v) for v in data['type_vars']], - # TODO: base_types? metaclass=data['metaclass'], - # TODO: decorators? ) res.fullname = data['fullname'] + res.base_types = [mypy.types.Instance.deserialize(t) for t in data['base_types']] + res.decorators = [Node.deserialize(d) for d in data['decorators']] res.is_builtinclass = data['is_builtinclass'] return res From 0e4bebb8f43e4af6f57a032e50b8cfbc944f746a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 4 Mar 2016 12:52:45 -0800 Subject: [PATCH 039/117] Serialize FuncDef._fullname. --- mypy/nodes.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/mypy/nodes.py b/mypy/nodes.py index 335723bd021d..ea1b7139d646 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -460,6 +460,7 @@ def is_constructor(self) -> bool: def serialize(self) -> JsonDict: return {'.class': 'FuncDef', 'name': self._name, + 'fullname': self._fullname, 'arguments': [a.serialize() for a in self.arguments], 'type': None if self.type is None else self.type.serialize(), } @@ -468,12 +469,13 @@ def serialize(self) -> JsonDict: def deserialize(cls, data: JsonDict) -> 'FuncDef': assert data['.class'] == 'FuncDef' body = Block([]) - return FuncDef(data['name'], - [Argument.deserialize(a) for a in data['arguments']], - body, - (None if data['type'] is None - else mypy.types.FunctionLike.deserialize(data['type'])), - ) + ret = FuncDef(data['name'], + [Argument.deserialize(a) for a in data['arguments']], + body, + (None if data['type'] is None + else mypy.types.FunctionLike.deserialize(data['type']))) + ret._fullname = data['fullname'] + return ret class Decorator(SymbolNode): From b82de11267fc4b3579bbb11c5967c5943b4d7668 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 4 Mar 2016 18:53:40 -0800 Subject: [PATCH 040/117] Serialize FuncDef.is_class. --- mypy/nodes.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mypy/nodes.py b/mypy/nodes.py index ea1b7139d646..69ff3ce3de4b 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -463,6 +463,8 @@ def serialize(self) -> JsonDict: 'fullname': self._fullname, 'arguments': [a.serialize() for a in self.arguments], 'type': None if self.type is None else self.type.serialize(), + 'is_class': self.is_class, + # TODO: Various other flags } @classmethod @@ -475,6 +477,7 @@ def deserialize(cls, data: JsonDict) -> 'FuncDef': (None if data['type'] is None else mypy.types.FunctionLike.deserialize(data['type']))) ret._fullname = data['fullname'] + ret.is_class = data['is_class'] return ret From 3ef91f4d79e65916845db98c4a9486a0babef8ca Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 6 Mar 2016 11:12:18 -0800 Subject: [PATCH 041/117] Avoid crash in TypeCleaner.visit_instance() if inst.type is None. --- mypy/fixup.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mypy/fixup.py b/mypy/fixup.py index ffa6aeb23414..e1b9726b9511 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -215,8 +215,8 @@ class TypeCleaner(TypeFixer): def visit_instance(self, inst: Instance) -> None: info = inst.type - if info.alt_fullname is not None: - return # We've already been here + if info is None or info.alt_fullname is not None: + return # Nothing here; or we've already been here if lookup_qualified(self.modules, info.fullname()) is not info: self.counter += 1 info.alt_fullname = info.fullname() + '$' + str(self.counter) @@ -298,4 +298,3 @@ def store_qualified(modules: Dict[str, MypyFile], name: str, info: SymbolNode) - return assert isinstance(node, TypeInfo) names = cast(TypeInfo, node).names - From ef061cf3915a5da2a68da0440ac1e9cc22e5d178 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 7 Mar 2016 12:15:49 -0800 Subject: [PATCH 042/117] Better approach to cross-references, to avoid copying imported definitions. --- mypy/fixup.py | 85 +++++++++++++++++++++++++++++++++++---------------- mypy/nodes.py | 85 +++++++++++++++++++++++---------------------------- 2 files changed, 96 insertions(+), 74 deletions(-) diff --git a/mypy/fixup.py b/mypy/fixup.py index e1b9726b9511..78f1f7738d7f 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -7,6 +7,7 @@ from mypy.nodes import (MypyFile, SymbolNode, SymbolTable, SymbolTableNode, TypeInfo, FuncDef, OverloadedFuncDef, Decorator, Var, + TypeVarExpr, ClassDef, LDEF, MDEF, GDEF, MODULE_REF) from mypy.types import (CallableType, EllipsisType, Instance, Overloaded, TupleType, TypeList, TypeVarType, UnboundType, UnionType, TypeVisitor) @@ -37,7 +38,10 @@ def compute_all_mros(symtab: SymbolTable, modules: Dict[str, MypyFile]) -> None: if value.kind in (LDEF, MDEF, GDEF) and isinstance(value.node, TypeInfo): info = value.node # print(' Calc MRO for', info.fullname()) - info.calculate_mro() + try: + info.calculate_mro() + except Exception: + import pdb; pdb.set_trace() if not info.mro: print('*** No MRO calculated for', info.fullname()) compute_all_mros(info.names, modules) @@ -59,18 +63,20 @@ def visit_type_info(self, info: TypeInfo) -> None: try: self.current_info = info # print('Descending into', info.fullname()) - if info.names is not None: + if info.defn: + info.defn.accept(self) + if info.names: self.visit_symbol_table(info.names) # print('Fixing up', info.fullname()) - if info.subtypes is not None: + if info.subtypes: for st in info.subtypes: self.visit_type_info(st) - if info.bases is not None: + if info.bases: for base in info.bases: base.accept(self.type_fixer) - if info._promote is not None: + if info._promote: info._promote.accept(self.type_fixer) - if info.tuple_type is not None: + if info.tuple_type: info.tuple_type.accept(self.type_fixer) finally: self.current_info = save_info @@ -78,24 +84,26 @@ def visit_type_info(self, info: TypeInfo) -> None: # NOTE: This method *definitely* isn't part of the NodeVisitor API. def visit_symbol_table(self, symtab: SymbolTable) -> None: for key, value in list(symtab.items()): # TODO: Only use list() when cleaning. - if value.kind in (LDEF, MDEF, GDEF): + cross_ref = value.cross_ref + if cross_ref is not None: # Fix up cross-reference. + del value.cross_ref + if cross_ref in self.modules: + value.node = self.modules[cross_ref] + else: + stnode = lookup_qualified_stnode(self.modules, cross_ref) + if stnode is None: + print("*** Could not find cross-reference", cross_ref) + else: + value.node = stnode.node + value.type_override = stnode.type_override + else: if isinstance(value.node, TypeInfo): # TypeInfo has no accept(). TODO: Add it? self.visit_type_info(value.node) elif value.node is not None: value.node.accept(self) - if value.type is not None: - value.type.accept(self.type_fixer) - elif value.kind == MODULE_REF: - self.visit_module_ref(value) - # TODO: Other kinds? - - # NOTE: Nor is this one. - def visit_module_ref(self, value: SymbolTableNode): - if value.module_ref not in self.modules: - print('*** Cannot find module', value.module_ref, 'needed for patch-up') - return - value.node = self.modules[value.module_ref] + if value.type_override is not None: + value.type_override.accept(self.type_fixer) def visit_func_def(self, func: FuncDef) -> None: if self.current_info is not None: @@ -106,15 +114,32 @@ def visit_func_def(self, func: FuncDef) -> None: if arg.type_annotation is not None: arg.type_annotation.accept(self.type_fixer) - def visit_overloaded_func_def(self, func: OverloadedFuncDef) -> None: + def visit_overloaded_func_def(self, o: OverloadedFuncDef) -> None: if self.current_info is not None: - func.info = self.current_info - if func.type: - func.type.accept(self.type_fixer) + o.info = self.current_info + if o.type: + o.type.accept(self.type_fixer) + for item in o.items: + item.accept(self) def visit_decorator(self, d: Decorator) -> None: if self.current_info is not None: d.var.info = self.current_info + if d.func: + d.func.accept(self) + if d.var: + d.var.accept(self) + for node in d.decorators: + node.accept(self) + + def visit_class_def(self, c: ClassDef) -> None: + for v in c.type_vars: + for value in v.values: + value.accept(self.type_fixer) + + def visit_type_var_expr(self, tv: TypeVarExpr) -> None: + for value in tv.values: + value.accept(self.type_fixer) def visit_var(self, v: Var) -> None: if self.current_info is not None: @@ -236,6 +261,14 @@ def visit_module_ref(self, value: SymbolTableNode) -> None: def lookup_qualified(modules: Dict[str, MypyFile], name: str) -> SymbolNode: + stnode = lookup_qualified_stnode(modules, name) + if stnode is None: + return None + else: + return stnode.node + + +def lookup_qualified_stnode(modules: Dict[str, MypyFile], name: str) -> SymbolTableNode: head = name rest = [] while True: @@ -248,17 +281,15 @@ def lookup_qualified(modules: Dict[str, MypyFile], name: str) -> SymbolNode: while True: if not rest: print('*** Cannot find', name) - import pdb # type: ignore - pdb.set_trace() return None key = rest.pop() if key not in names: print('*** Cannot find', key, 'for', name) return None stnode = names[key] - node = stnode.node if not rest: - return node + return stnode + node = stnode.node assert isinstance(node, TypeInfo) names = cast(TypeInfo, node).names diff --git a/mypy/nodes.py b/mypy/nodes.py index 69ff3ce3de4b..4868c8e823e8 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -200,7 +200,7 @@ def serialize(self) -> JsonDict: return {'.class': 'MypyFile', '_name': self._name, '_fullname': self._fullname, - 'names': self.names.serialize(), + 'names': self.names.serialize(self._fullname), 'is_stub': self.is_stub, } @@ -438,7 +438,7 @@ class FuncDef(FuncItem): is_conditional = False # Defined conditionally (within block)? is_abstract = False is_property = False - original_def = None # type: Union[FuncDef, Var] # Original conditional definition + original_def = None # type: Union[None, FuncDef, Var] # Original conditional definition def __init__(self, name: str, # Function name @@ -488,7 +488,7 @@ class Decorator(SymbolNode): """ func = None # type: FuncDef # Decorated function - decorators = None # type: List[Node] # Decorators, at least one + decorators = None # type: List[Node] # Decorators, at least one # XXX Not true var = None # type: Var # Represents the decorated function obj is_overload = False @@ -1868,7 +1868,7 @@ def serialize(self) -> Union[str, JsonDict]: 'fullname': self.fullname(), 'alt_fullname': self.alt_fullname, 'subtypes': [t.serialize() for t in self.subtypes], - 'names': self.names.serialize(), + 'names': self.names.serialize(self.alt_fullname or self.fullname()), 'defn': self.defn.serialize(), 'is_abstract': self.is_abstract, 'abstract_attributes': self.abstract_attributes, @@ -1923,12 +1923,13 @@ class SymbolTableNode: # Module id (e.g. "foo.bar") or None mod_id = '' # If this not None, override the type of the 'node' attribute. - type_override = None # type: mypy.types.Type + type_override = None # type: Optional[mypy.types.Type] # If False, this name won't be imported via 'from import *'. # This has no effect on names within classes. module_public = True - # For deserialized MODULE_REF nodes, the referenced module name - module_ref = None # type: str + # For deserialized MODULE_REF nodes, the referenced module name; + # for other nodes, optionally the name of the referenced object. + cross_ref = None # type: Optional[str] def __init__(self, kind: int, node: Optional[SymbolNode], mod_id: str = None, typ: 'mypy.types.Type' = None, tvar_id: int = 0, @@ -1970,58 +1971,48 @@ def __str__(self) -> str: s += ' : {}'.format(self.type) return s - def serialize(self) -> JsonDict: + def serialize(self, prefix: str, name: str) -> JsonDict: data = {'.class': 'SymbolTableNode', 'kind': node_kinds[self.kind], } # type: JsonDict + if self.tvar_id: + data['tvar_id'] = self.tvar_id if self.kind == MODULE_REF: - data['module_ref'] = self.node.fullname() - elif self.kind == TYPE_ALIAS: - assert self.type_override is not None - assert self.node is not None - data['type'] = self.type_override.serialize() - data['node'] = self.node.serialize() + data['cross_ref'] = self.node.fullname() else: - if isinstance(self.node, TypeInfo): - data['node'] = self.node.serialize() - typ = self.type - if typ is not None: - print('XXX Huh?', typ, 'for', self.node._fullname) - elif isinstance(self.node, FuncDef): + if self.node is not None: + if prefix is not None: + if isinstance(self.node, TypeInfo): + fullname = self.node.alt_fullname or self.node.fullname() + else: + fullname = self.node.fullname() + if fullname is not None and fullname != prefix + '.' + name: + data['cross_ref'] = fullname + return data data['node'] = self.node.serialize() - typ = self.type - if typ is not None: - data['type'] = typ.serialize() - elif isinstance(self.node, (Var, TypeVarExpr, OverloadedFuncDef, Decorator)): - data['node'] = self.node.serialize() - else: - if self.kind == UNBOUND_IMPORTED: - pass # TODO - else: - print('XXX Huhhhh?', self.__dict__) # type: ignore - if len(data) == 2 and self.kind != UNBOUND_IMPORTED: - print('An unsupported SymbolTableNode!') - import pdb # type: ignore - pdb.set_trace() + if self.type_override is not None: + data['type'] = self.type.serialize() return data @classmethod def deserialize(cls, data: JsonDict) -> 'SymbolTableNode': assert data['.class'] == 'SymbolTableNode' kind = inverse_node_kinds[data['kind']] - if kind == MODULE_REF: + if 'cross_ref' in data: # This needs to be fixed up in a later pass. stnode = SymbolTableNode(kind, None) - stnode.module_ref = data['module_ref'] - return stnode - typ = None - node = None - if 'type' in data: - typ = mypy.types.Type.deserialize(data['type']) - if 'node' in data: - node = SymbolNode.deserialize(data['node']) - # TODO: Rest - return SymbolTableNode(kind, node, typ=typ) + stnode.cross_ref = data['cross_ref'] + else: + node = None + if 'node' in data: + node = SymbolNode.deserialize(data['node']) + typ = None + if 'type' in data: + typ = mypy.types.Type.deserialize(data['type']) + stnode = SymbolTableNode(kind, node, typ=typ) + if 'tvar_id' in data: + stnode.tvar_id = data['tvar_id'] + return stnode class SymbolTable(Dict[str, SymbolTableNode]): @@ -2041,12 +2032,12 @@ def __str__(self) -> str: a[-1] += ')' return '\n'.join(a) - def serialize(self) -> JsonDict: + def serialize(self, fullname: str) -> JsonDict: data = {'.class': 'SymbolTable'} # type: JsonDict for key, value in self.items(): if key == '__builtins__' or not value.module_public: continue - data[key] = value.serialize() + data[key] = value.serialize(fullname, key) return data @classmethod From ec4e173ede671d80653d7362716cc7c51894e4a8 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 8 Mar 2016 09:07:00 -0800 Subject: [PATCH 043/117] Assign namedtuple a unique name from the start. Get rid of cleanup pass. --- mypy/build.py | 2 -- mypy/fixup.py | 39 +++------------------------------------ mypy/semanal.py | 5 ++++- 3 files changed, 7 insertions(+), 39 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 8a0f383237a0..d2910564ed83 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1442,8 +1442,6 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: path = file.path if path == '': return - manager.log('Cleaning {}'.format(id)) - fixup.cleanup_module(file.tree, manager.semantic_analyzer.modules) path = os.path.abspath(path) manager.log('Dumping {} to {}'.format(id, path)) st = os.stat(path) # TODO: Errors diff --git a/mypy/fixup.py b/mypy/fixup.py index 78f1f7738d7f..5b45433eed5e 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -1,7 +1,4 @@ -"""Fix up various things after deserialization. - -Also clean up a few things before serialization. -""" +"""Fix up various things after deserialization.""" from typing import Any, Dict, Optional, cast @@ -14,12 +11,6 @@ from mypy.visitor import NodeVisitor -def cleanup_module(tree: MypyFile, modules: Dict[str, MypyFile]) -> None: - # print("Cleaning", tree.fullname()) - node_cleaner = NodeCleaner(modules) - node_cleaner.visit_symbol_table(tree.names) - - def fixup_module_pass_one(tree: MypyFile, modules: Dict[str, MypyFile]) -> None: assert modules[tree.fullname()] is tree node_fixer = NodeFixer(modules) @@ -83,7 +74,8 @@ def visit_type_info(self, info: TypeInfo) -> None: # NOTE: This method *definitely* isn't part of the NodeVisitor API. def visit_symbol_table(self, symtab: SymbolTable) -> None: - for key, value in list(symtab.items()): # TODO: Only use list() when cleaning. + # Copy the items because we may mutate symtab. + for key, value in list(symtab.items()): cross_ref = value.cross_ref if cross_ref is not None: # Fix up cross-reference. del value.cross_ref @@ -235,31 +227,6 @@ def visit_void(self, o: Any) -> None: pass # Nothing to descend into. -class TypeCleaner(TypeFixer): - counter = 0 - - def visit_instance(self, inst: Instance) -> None: - info = inst.type - if info is None or info.alt_fullname is not None: - return # Nothing here; or we've already been here - if lookup_qualified(self.modules, info.fullname()) is not info: - self.counter += 1 - info.alt_fullname = info.fullname() + '$' + str(self.counter) - print("Set alt_fullname for", info.alt_fullname) - store_qualified(self.modules, info.alt_fullname, info) - for a in inst.args: - a.accept(self) - - -class NodeCleaner(NodeFixer): - def __init__(self, modules: Dict[str, MypyFile]) -> None: - super().__init__(modules, TypeCleaner(modules)) - - def visit_module_ref(self, value: SymbolTableNode) -> None: - assert value.kind == MODULE_REF - # TODO: Now what? - - def lookup_qualified(modules: Dict[str, MypyFile], name: str) -> SymbolNode: stnode = lookup_qualified_stnode(modules, name) if stnode is None: diff --git a/mypy/semanal.py b/mypy/semanal.py index 6ee3d40318ef..5ce162c87a4b 100644 --- a/mypy/semanal.py +++ b/mypy/semanal.py @@ -1356,8 +1356,11 @@ def check_namedtuple(self, node: Node) -> TypeInfo: # Error. Construct dummy return value. return self.build_namedtuple_typeinfo('namedtuple', [], []) else: - name = cast(StrExpr, call.args[0]).value + # Give it a unique name derived from the line number. + name = cast(StrExpr, call.args[0]).value + '@' + str(call.line) info = self.build_namedtuple_typeinfo(name, items, types) + # Store it as a global just in case it would remain anonymous. + self.globals[name] = SymbolTableNode(GDEF, info, self.cur_mod_id) call.analyzed = NamedTupleExpr(info).set_line(call.line) return info From 10a26a0c95254f33764a6458aacdcd14dab2ae8a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 8 Mar 2016 11:06:02 -0800 Subject: [PATCH 044/117] Fix the post-serialization checking code. --- mypy/build.py | 2 +- mypy/fixup.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index d2910564ed83..84746a00d4ef 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1487,7 +1487,7 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: new_keys = sorted(new_names) print('Fixing up', file.id) - fixup.fixup_symbol_table(new_names, file.semantic_analyzer().modules) + fixup.fixup_module_pass_one(new_tree, file.semantic_analyzer().modules) print('Comparing keys', file.id) old_tree = file.tree diff --git a/mypy/fixup.py b/mypy/fixup.py index 5b45433eed5e..322812b3d80e 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -12,14 +12,12 @@ def fixup_module_pass_one(tree: MypyFile, modules: Dict[str, MypyFile]) -> None: - assert modules[tree.fullname()] is tree node_fixer = NodeFixer(modules) node_fixer.visit_symbol_table(tree.names) # print('Done pass 1', tree.fullname()) def fixup_module_pass_two(tree: MypyFile, modules: Dict[str, MypyFile]) -> None: - assert modules[tree.fullname()] is tree compute_all_mros(tree.names, modules) # print('Done pass 2', tree.fullname()) From 55093dac9a66303be2a2a3ab5292c895b367cb42 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 8 Mar 2016 11:09:46 -0800 Subject: [PATCH 045/117] Fix flake8 errors. --- mypy/fixup.py | 3 ++- mypy/semanal.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/mypy/fixup.py b/mypy/fixup.py index 322812b3d80e..acd2363c576b 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -30,7 +30,8 @@ def compute_all_mros(symtab: SymbolTable, modules: Dict[str, MypyFile]) -> None: try: info.calculate_mro() except Exception: - import pdb; pdb.set_trace() + import pdb + pdb.set_trace() if not info.mro: print('*** No MRO calculated for', info.fullname()) compute_all_mros(info.names, modules) diff --git a/mypy/semanal.py b/mypy/semanal.py index 5ce162c87a4b..75cac6dc95bf 100644 --- a/mypy/semanal.py +++ b/mypy/semanal.py @@ -1357,7 +1357,7 @@ def check_namedtuple(self, node: Node) -> TypeInfo: return self.build_namedtuple_typeinfo('namedtuple', [], []) else: # Give it a unique name derived from the line number. - name = cast(StrExpr, call.args[0]).value + '@' + str(call.line) + name = cast(StrExpr, call.args[0]).value + '@' + str(call.line) info = self.build_namedtuple_typeinfo(name, items, types) # Store it as a global just in case it would remain anonymous. self.globals[name] = SymbolTableNode(GDEF, info, self.cur_mod_id) From 4d2a5a98ab30cb0cb5991057a9dc33d443f932dd Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 8 Mar 2016 11:26:48 -0800 Subject: [PATCH 046/117] Only give a NamedTuple a unique name if not in an assignment. Also fix tests. --- mypy/semanal.py | 15 ++++++++++----- mypy/test/data/semanal-namedtuple.test | 8 ++++---- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/mypy/semanal.py b/mypy/semanal.py index 75cac6dc95bf..5ba8bd5c6acd 100644 --- a/mypy/semanal.py +++ b/mypy/semanal.py @@ -1323,20 +1323,23 @@ def process_namedtuple_definition(self, s: AssignmentStmt) -> None: """Check if s defines a namedtuple; if yes, store the definition in symbol table.""" if len(s.lvalues) != 1 or not isinstance(s.lvalues[0], NameExpr): return - named_tuple = self.check_namedtuple(s.rvalue) + lvalue = cast(NameExpr, s.lvalues[0]) + name = lvalue.name + named_tuple = self.check_namedtuple(s.rvalue, name) if named_tuple is None: return # Yes, it's a valid namedtuple definition. Add it to the symbol table. - lvalue = cast(NameExpr, s.lvalues[0]) - name = lvalue.name node = self.lookup(name, s) node.kind = GDEF # TODO locally defined namedtuple # TODO call.analyzed node.node = named_tuple - def check_namedtuple(self, node: Node) -> TypeInfo: + def check_namedtuple(self, node: Node, var_name: str = None) -> TypeInfo: """Check if a call defines a namedtuple. + The optional var_name argument is the name of the variable to + which this is assigned, if any. + If it does, return the corresponding TypeInfo. Return None otherwise. If the definition is invalid but looks like a namedtuple, @@ -1357,7 +1360,9 @@ def check_namedtuple(self, node: Node) -> TypeInfo: return self.build_namedtuple_typeinfo('namedtuple', [], []) else: # Give it a unique name derived from the line number. - name = cast(StrExpr, call.args[0]).value + '@' + str(call.line) + name = cast(StrExpr, call.args[0]).value + if name != var_name: + name += '@' + str(call.line) info = self.build_namedtuple_typeinfo(name, items, types) # Store it as a global just in case it would remain anonymous. self.globals[name] = SymbolTableNode(GDEF, info, self.cur_mod_id) diff --git a/mypy/test/data/semanal-namedtuple.test b/mypy/test/data/semanal-namedtuple.test index de4968e67558..9ff9f6af6e8a 100644 --- a/mypy/test/data/semanal-namedtuple.test +++ b/mypy/test/data/semanal-namedtuple.test @@ -86,9 +86,9 @@ MypyFile:1( ClassDef:2( A TupleType( - Tuple[Any, fallback=__main__.N]) + Tuple[Any, fallback=__main__.N@2]) BaseType( - __main__.N) + __main__.N@2) PassStmt:2())) [case testNamedTupleBaseClassWithItemTypes] @@ -100,9 +100,9 @@ MypyFile:1( ClassDef:2( A TupleType( - Tuple[builtins.int, fallback=__main__.N]) + Tuple[builtins.int, fallback=__main__.N@2]) BaseType( - __main__.N) + __main__.N@2) PassStmt:2())) -- Errors From 1df40eabfc7462cdfc5bbd9c22823158254bade8 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 8 Mar 2016 11:53:00 -0800 Subject: [PATCH 047/117] A cross_ref must have a dotted name. --- mypy/nodes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mypy/nodes.py b/mypy/nodes.py index 4868c8e823e8..f9a23975f861 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -1986,7 +1986,8 @@ def serialize(self, prefix: str, name: str) -> JsonDict: fullname = self.node.alt_fullname or self.node.fullname() else: fullname = self.node.fullname() - if fullname is not None and fullname != prefix + '.' + name: + if (fullname is not None and '.' in fullname and + fullname != prefix + '.' + name): data['cross_ref'] = fullname return data data['node'] = self.node.serialize() From f2076b8f97172b165ecf9fd23d76ff2357b963af Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 8 Mar 2016 13:26:12 -0800 Subject: [PATCH 048/117] Do not serialize Decorator.decorators. --- mypy/nodes.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mypy/nodes.py b/mypy/nodes.py index f9a23975f861..66bb86da8f05 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -511,7 +511,6 @@ def accept(self, visitor: NodeVisitor[T]) -> T: def serialize(self) -> JsonDict: return {'.class': 'Decorator', 'func': self.func.serialize(), - 'decorators': [d.serialize() for d in self.decorators], 'var': self.var.serialize(), 'is_overload': self.is_overload, } @@ -520,7 +519,7 @@ def serialize(self) -> JsonDict: def deserialize(cls, data: JsonDict) -> 'Decorator': assert data['.class'] == 'Decorator' dec = Decorator(FuncDef.deserialize(data['func']), - [Node.deserialize(d) for d in data['decorators']], + [], Var.deserialize(data['var'])) dec.is_overload = data['is_overload'] return dec From 526af54e66b2be8dddfc6c12e6809ba4053f0629 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 8 Mar 2016 13:28:30 -0800 Subject: [PATCH 049/117] Give nested classes their full name. Also updated tests. --- mypy/semanal.py | 5 ++++- mypy/test/data/semanal-classes.test | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/mypy/semanal.py b/mypy/semanal.py index 5ba8bd5c6acd..3280d607d33e 100644 --- a/mypy/semanal.py +++ b/mypy/semanal.py @@ -2300,7 +2300,10 @@ def process_nested_classes(self, outer_def: ClassDef) -> None: for node in outer_def.defs.body: if isinstance(node, ClassDef): node.info = TypeInfo(SymbolTable(), node) - node.info._fullname = node.info.name() + if outer_def.fullname: + node.info._fullname = outer_def.fullname + '.' + node.info.name() + else: + node.info._fullname = node.info.name() symbol = SymbolTableNode(MDEF, node.info) outer_def.info.names[node.name] = symbol self.process_nested_classes(node) diff --git a/mypy/test/data/semanal-classes.test b/mypy/test/data/semanal-classes.test index b05c49ac5876..498f5b6f1b17 100644 --- a/mypy/test/data/semanal-classes.test +++ b/mypy/test/data/semanal-classes.test @@ -329,7 +329,7 @@ MypyFile:1( B PassStmt:2()) ExpressionStmt:3( - NameExpr(B [m])))) + NameExpr(B [__main__.A.B])))) [case testClassWithBaseClassWithinClass] class A: @@ -345,7 +345,7 @@ MypyFile:1( ClassDef:3( C BaseType( - B) + __main__.A.B) PassStmt:3()))) [case testDeclarationReferenceToNestedClass] From 0acc794459317cdbde0d3a86b9fa753efb063f92 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 8 Mar 2016 16:08:30 -0800 Subject: [PATCH 050/117] Fix reading source code when falling back to UnprocessedFile in ProbablyCachedFile.process(). --- mypy/build.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 84746a00d4ef..f16435455fbe 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -899,13 +899,9 @@ def process(self) -> None: file = CacheLoadedFile(self.info(), self.meta, data) else: # Didn't work -- construct an UnprocessedFile. - path, text = read_module_source_from_file(self.id, - self.manager.lib_path, - self.manager.pyversion, - SILENT_IMPORTS in self.manager.flags) + text = read_with_python_encoding(self.path, self.manager.pyversion) # TODO: Errors assert text is not None - assert path == os.path.abspath(self.path), (path, self.path) file = UnprocessedFile(self.info(), text) self.switch_state(file) From 4eb8b257e787f093450bb554c365a134db1a2092 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 8 Mar 2016 16:18:04 -0800 Subject: [PATCH 051/117] Serialize SymbolTableNode.module_public. --- mypy/nodes.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mypy/nodes.py b/mypy/nodes.py index 66bb86da8f05..a57fb4860eff 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -1976,6 +1976,8 @@ def serialize(self, prefix: str, name: str) -> JsonDict: } # type: JsonDict if self.tvar_id: data['tvar_id'] = self.tvar_id + if not self.module_public: + data['module_public'] = False if self.kind == MODULE_REF: data['cross_ref'] = self.node.fullname() else: @@ -2012,6 +2014,8 @@ def deserialize(cls, data: JsonDict) -> 'SymbolTableNode': stnode = SymbolTableNode(kind, node, typ=typ) if 'tvar_id' in data: stnode.tvar_id = data['tvar_id'] + if 'module_public' in data: + stnode.module_public = data['module_public'] return stnode @@ -2035,7 +2039,7 @@ def __str__(self) -> str: def serialize(self, fullname: str) -> JsonDict: data = {'.class': 'SymbolTable'} # type: JsonDict for key, value in self.items(): - if key == '__builtins__' or not value.module_public: + if key == '__builtins__': continue data[key] = value.serialize(fullname, key) return data From 64b353a680e0447b420c8516559ba817b912b8d0 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 8 Mar 2016 18:23:04 -0800 Subject: [PATCH 052/117] Use pyversion as part of cache name. --- mypy/build.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index f16435455fbe..d860bd44fd00 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1380,8 +1380,8 @@ def read_with_python_encoding(path: str, pyversion: Tuple[int, int]) -> str: MYPY_CACHE = '.mypy_cache' -def get_cache_names(id: str, path: str) -> Tuple[str, str]: - prefix = os.path.join(MYPY_CACHE, *id.split('.')) +def get_cache_names(id: str, path: str, pyversion: Tuple[int, int]) -> Tuple[str, str]: + prefix = os.path.join(MYPY_CACHE, '%d.%d' % pyversion, *id.split('.')) is_package = os.path.basename(path).startswith('__init__.py') if is_package: prefix = os.path.join(prefix, '__init__') @@ -1389,7 +1389,7 @@ def get_cache_names(id: str, path: str) -> Tuple[str, str]: def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[CacheMeta]: - meta_json, data_json = get_cache_names(id, path) + meta_json, data_json = get_cache_names(id, path, manager.pyversion) manager.log('Finding {} {}'.format(id, data_json)) if not os.path.exists(meta_json): return None @@ -1443,7 +1443,7 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: st = os.stat(path) # TODO: Errors mtime = st.st_mtime size = st.st_size - meta_json, data_json = get_cache_names(id, path) + meta_json, data_json = get_cache_names(id, path, manager.pyversion) manager.log('Writing {} {} {}'.format(id, meta_json, data_json)) data = file.tree.serialize() parent = os.path.dirname(data_json) From 9b83618f2f261d9df80aac2d1f7eed7c93dab0f0 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 9 Mar 2016 08:21:27 -0800 Subject: [PATCH 053/117] Add --incremental flag (off by default). --- mypy/build.py | 6 +++++- mypy/main.py | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/mypy/build.py b/mypy/build.py index d860bd44fd00..76b8e3eb6538 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -58,6 +58,7 @@ FAST_PARSER = 'fast-parser' # Use experimental fast parser # Disallow calling untyped functions from typed ones DISALLOW_UNTYPED_CALLS = 'disallow-untyped-calls' +INCREMENTAL = 'incremental' # Incremental mode: use the cache # State ids. These describe the states a source file / module can be in a # build. @@ -603,6 +604,8 @@ def is_module(self, id: str) -> bool: return find_module(id, self.lib_path) is not None def maybe_make_cached_state(self, id: str, path: str) -> Optional['UnprocessedBase']: + if INCREMENTAL not in self.flags: + return None m = find_cache_meta(id, path, self) if m is None: return None @@ -1182,7 +1185,8 @@ def process(self) -> None: # FIX remove from active state list to speed up processing file = TypeCheckedFile(self.info(), self.tree) - dump_to_json(file, self.manager) + if INCREMENTAL in self.manager.flags: + dump_to_json(file, self.manager) self.switch_state(file) def state(self) -> int: diff --git a/mypy/main.py b/mypy/main.py index 062593e5ba68..a7524d67eb44 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -181,6 +181,9 @@ def process_options(args: List[str]) -> Tuple[List[BuildSource], Options]: elif args[0] == '--disallow-untyped-calls': options.build_flags.append(build.DISALLOW_UNTYPED_CALLS) args = args[1:] + elif args[0] == '--incremental': + options.build_flags.append(build.INCREMENTAL) + args = args[1:] elif args[0] in ('--version', '-V'): ver = True args = args[1:] @@ -316,6 +319,7 @@ def usage(msg: str = None) -> None: -s, --silent-imports don't follow imports to .py files --disallow-untyped-calls disallow calling functions without type annotations from functions with type annotations + --incremental incremental mode: cache type-checking results --implicit-any behave as though all functions were annotated with Any -f, --dirty-stubs don't warn if typeshed is out of sync --pdb invoke pdb on fatal error From 2ffbbc3429d97afe490eb1228d894f8f5f02b568 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 9 Mar 2016 15:09:34 -0800 Subject: [PATCH 054/117] TypeVarDef.values is actually Optional. --- mypy/fixup.py | 5 +++-- mypy/types.py | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/mypy/fixup.py b/mypy/fixup.py index acd2363c576b..0a13223493e6 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -173,8 +173,9 @@ def visit_callable_type(self, ct: CallableType) -> None: if ct.ret_type is not None: ct.ret_type.accept(self) for v in ct.variables: - for val in v.values: - val.accept(self) + if v.values: + for val in v.values: + val.accept(self) v.upper_bound.accept(self) for i, t in ct.bound_vars: t.accept(self) diff --git a/mypy/types.py b/mypy/types.py index e78105aa1090..b84ebe1b6133 100644 --- a/mypy/types.py +++ b/mypy/types.py @@ -48,12 +48,12 @@ class TypeVarDef(mypy.nodes.Context): name = '' id = 0 - values = None # type: List[Type] + values = None # type: Optional[List[Type]] upper_bound = None # type: Type variance = INVARIANT # type: int line = 0 - def __init__(self, name: str, id: int, values: List[Type], + def __init__(self, name: str, id: int, values: Optional[List[Type]], upper_bound: Type, variance: int = INVARIANT, line: int = -1) -> None: self.name = name self.id = id @@ -75,7 +75,7 @@ def serialize(self) -> JsonDict: return {'.class': 'TypeVarDef', 'name': self.name, 'id': self.id, - 'values': [v.serialize() for v in self.values], + 'values': None if self.values is None else [v.serialize() for v in self.values], 'upper_bound': self.upper_bound.serialize(), 'variance': self.variance, } @@ -85,7 +85,7 @@ def deserialize(cls, data: JsonDict) -> 'TypeVarDef': assert data['.class'] == 'TypeVarDef' return TypeVarDef(data['name'], data['id'], - [Type.deserialize(v) for v in data['values']], + None if data['values'] is None else [Type.deserialize(v) for v in data['values']], Type.deserialize(data['upper_bound']), data['variance'], ) From 7a3d90cecc8c3d928c3441695747ca7470c30511 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 9 Mar 2016 16:03:27 -0800 Subject: [PATCH 055/117] Do not serialize ClassDef.decorators. --- mypy/nodes.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mypy/nodes.py b/mypy/nodes.py index a57fb4860eff..fc67dd1da33f 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -638,7 +638,6 @@ def serialize(self) -> JsonDict: 'type_vars': [v.serialize() for v in self.type_vars], 'base_types': [t.serialize() for t in self.base_types], 'metaclass': self.metaclass, - 'decorators': [d.serialize() for d in self.decorators], 'is_builtinclass': self.is_builtinclass, } @@ -652,7 +651,6 @@ def deserialize(self, data: JsonDict) -> 'ClassDef': ) res.fullname = data['fullname'] res.base_types = [mypy.types.Instance.deserialize(t) for t in data['base_types']] - res.decorators = [Node.deserialize(d) for d in data['decorators']] res.is_builtinclass = data['is_builtinclass'] return res From 40152a47800b7a17447704d5049254ae4098221e Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 10 Mar 2016 12:41:13 -0800 Subject: [PATCH 056/117] Store a reference to the modules dict on the BuildManager. --- mypy/build.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 76b8e3eb6538..7661cb4e4570 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -365,6 +365,7 @@ class BuildManager: data_dir: Mypy data directory (contains stubs) target: Build target; selects which passes to perform lib_path: Library path for looking up modules + modules: Mapping of module ID to MypyFile semantic_analyzer: Semantic analyzer, pass 2 semantic_analyzer_pass3: @@ -408,10 +409,10 @@ def __init__(self, data_dir: str, self.reports = reports self.semantic_analyzer = SemanticAnalyzer(lib_path, self.errors, pyversion=pyversion) - modules = self.semantic_analyzer.modules - self.semantic_analyzer_pass3 = ThirdPass(modules, self.errors) + self.modules = self.semantic_analyzer.modules + self.semantic_analyzer_pass3 = ThirdPass(self.modules, self.errors) self.type_checker = TypeChecker(self.errors, - modules, + self.modules, self.pyversion, DISALLOW_UNTYPED_CALLS in self.flags) self.states = [] # type: List[State] @@ -479,8 +480,7 @@ def process(self, initial_states: List['UnprocessedBase']) -> BuildResult: if self.errors.is_errors(): self.errors.raise_error() - return BuildResult(self.semantic_analyzer.modules, - self.type_checker.type_map) + return BuildResult(self.modules, self.type_checker.type_map) def next_available_state(self) -> 'State': """Find a ready state (one that has all its dependencies met).""" @@ -928,7 +928,7 @@ def __init__(self, info: StateInfo, meta: CacheMeta, data: Any) -> None: self.tree = MypyFile.deserialize(data) # Store the parsed module in the shared module symbol table. - self.semantic_analyzer().modules[self.id] = self.tree + self.manager.modules[self.id] = self.tree def is_ready(self) -> bool: """Return True if all dependencies are at least in the same state @@ -947,7 +947,7 @@ def is_ready(self) -> bool: def process(self) -> None: """Patch up cross-references and Transition to CachePatchedFile.""" self.manager.log('FIXING MODULE PASS ONE {}'.format(self.id)) - fixup.fixup_module_pass_one(self.tree, self.semantic_analyzer().modules) + fixup.fixup_module_pass_one(self.tree, self.manager.modules) file = CachePatchedFile(self.info(), self.tree, self.meta) self.switch_state(file) @@ -985,7 +985,7 @@ def is_ready(self) -> bool: def process(self) -> None: """Calculate all MROs and transition to CacheWithMroFile.""" self.manager.log('FIXING MODULE PASS TWO {}'.format(self.id)) - fixup.fixup_module_pass_two(self.tree, self.semantic_analyzer().modules) + fixup.fixup_module_pass_two(self.tree, self.manager.modules) file = CacheWithMroFile(self.info(), self.tree, self.meta) self.switch_state(file) @@ -1026,22 +1026,22 @@ def load_dependencies(self): def process(self) -> None: """Parse the file, store global names and advance to the next state.""" - if self.id in self.semantic_analyzer().modules: + if self.id in self.manager.modules: self.fail(self.path, 1, "Duplicate module named '{}'".format(self.id)) return tree = self.parse(self.program_text, self.path) # Store the parsed module in the shared module symbol table. - self.semantic_analyzer().modules[self.id] = tree + modules = self.manager.modules + modules[self.id] = tree if '.' in self.id: # Include module in the symbol table of the enclosing package. c = self.id.split('.') p = '.'.join(c[:-1]) - sem_anal = self.semantic_analyzer() - if p in sem_anal.modules: - sem_anal.modules[p].names[c[-1]] = SymbolTableNode(MODULE_REF, tree, p) + if p in modules: + modules[p].names[c[-1]] = SymbolTableNode(MODULE_REF, tree, p) if self.id != 'builtins': # The builtins module is imported implicitly in every program (it @@ -1487,7 +1487,7 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: new_keys = sorted(new_names) print('Fixing up', file.id) - fixup.fixup_module_pass_one(new_tree, file.semantic_analyzer().modules) + fixup.fixup_module_pass_one(new_tree, manager.modules) print('Comparing keys', file.id) old_tree = file.tree @@ -1504,7 +1504,7 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: print(' Old key', key, 'not found in new tree') print('Comparing values', file.id) - modules = file.semantic_analyzer().modules + modules = manager.modules for key in old_keys: if key not in new_keys: continue From 492691560773a597b29f7900ade7655594197841 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 10 Mar 2016 13:43:26 -0800 Subject: [PATCH 057/117] Refactor dump_to_json(). --- mypy/build.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 7661cb4e4570..130ee3d4d9ca 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1442,6 +1442,11 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: path = file.path if path == '': return + write_cache(id, path, file.tree, file.dependencies, manager) + + +def write_cache(id: str, path: str, tree: MypyFile, dependencies: List[str], + manager: BuildManager) -> None: path = os.path.abspath(path) manager.log('Dumping {} to {}'.format(id, path)) st = os.stat(path) # TODO: Errors @@ -1449,7 +1454,7 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: size = st.st_size meta_json, data_json = get_cache_names(id, path, manager.pyversion) manager.log('Writing {} {} {}'.format(id, meta_json, data_json)) - data = file.tree.serialize() + data = tree.serialize() parent = os.path.dirname(data_json) if not os.path.isdir(parent): os.makedirs(parent) @@ -1466,7 +1471,7 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: 'mtime': mtime, 'size': size, 'data_mtime': data_mtime, - 'dependencies': file.dependencies, + 'dependencies': dependencies, } with open(meta_json_tmp, 'w') as f: json.dump(meta, f, sort_keys=True) @@ -1478,7 +1483,7 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: # Now, as a test, read it back. print() - print('Reading what we wrote for', file.id, 'from', data_json) + print('Reading what we wrote for', id, 'from', data_json) with open(data_json, 'r') as f: new_data = json.load(f) assert new_data == data @@ -1486,11 +1491,11 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: new_names = new_tree.names new_keys = sorted(new_names) - print('Fixing up', file.id) + print('Fixing up', id) fixup.fixup_module_pass_one(new_tree, manager.modules) - print('Comparing keys', file.id) - old_tree = file.tree + print('Comparing keys', id) + old_tree = tree old_names = old_tree.names old_keys = sorted(old_names) if new_keys != old_keys: @@ -1503,7 +1508,7 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: if key != '__builtins__' and v.module_public: print(' Old key', key, 'not found in new tree') - print('Comparing values', file.id) + print('Comparing values', id) modules = manager.modules for key in old_keys: if key not in new_keys: @@ -1511,7 +1516,7 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: oldv = old_names[key] newv = new_names[key] if newv.mod_id != oldv.mod_id: - newv.mod_id = file.id # XXX Hack + newv.mod_id = id # XXX Hack if newv.kind == MODULE_REF and newv.node is None: fn = oldv.node.fullname() if fn in modules: From 2515bd1fd203b05f90cf5ec5d1d2e1e0621d0610 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 9 Mar 2016 22:06:36 -0800 Subject: [PATCH 058/117] New dependency manager. For now, only used when --incremental given. --- mypy/build.py | 5 + mypy/depmgr.py | 451 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 456 insertions(+) create mode 100644 mypy/depmgr.py diff --git a/mypy/build.py b/mypy/build.py index 130ee3d4d9ca..d6b4305e35d2 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -208,6 +208,11 @@ def build(sources: List[BuildSource], implicit_any=implicit_any, reports=reports) + if INCREMENTAL in flags: + from .depmgr import dispatch + dispatch(sources, manager) + return BuildResult(manager.modules, manager.type_checker.type_map) + # Construct information that describes the initial files. __main__ is the # implicit module id and the import context is empty initially ([]). initial_states = [] # type: List[UnprocessedBase] diff --git a/mypy/depmgr.py b/mypy/depmgr.py new file mode 100644 index 000000000000..157b2d3dca36 --- /dev/null +++ b/mypy/depmgr.py @@ -0,0 +1,451 @@ +"""Dependency manager. + +This will replace the dependency management in build.py. + +Design +====== + +Ideally +------- + +A. Collapse cycles (each SCC -- strongly connected component -- + becomes one "supernode"). + +B. Topologically sort nodes based on dependencies. + +C. Process from leaves towards roots. + +Wrinkles +-------- + +a. Need to parse source modules to determine dependencies. + +b. Import cycles. + +c. Must order mtimes of files to decide whether to re-process; depends + on clock never resetting. + +d. from P import M; checks filesystem whether module P.M exists in + filesystem. + +e. Race conditions, where somebody modifies a file while we're + processing. I propose not to modify the algorithm to handle this, + but to detect when this could lead to inconsistencies. (For + example, when we decide on the dependencies based on cache + metadata, and then we decide to re-parse a file because of a stale + dependency, if the re-parsing leads to a different list of + dependencies we should warn the user or start over.) + +Steps +----- + +1. For each explicitly given module find the source file location. + +2. For each such module load and check the cache metadata, and decide + whether it's valid. + +3. Now recursively (or iteratively) find dependencies and add those to + the graph: + + - for cached nodes use the list of dependencies from the cache + metadata; + + - for uncached nodes parse the file and process all imports found, + taking care of (a) above. + +Step 3 should also address (d) above. + +Once step 3 terminates we have the entire dependency graph, and for +each module we've either loaded the cache metadata or parsed the +source code. (However, we may still need to parse those modules for +which we have cache metadata but that depend, directly or indirectly, +on at least one module for which the cache metadata is stale.) + +Now we can execute steps A-C from the first section. Finding SCCs for +step A shouldn't be hard; there's a recipe here: +http://code.activestate.com/recipes/578507/. + +For single nodes, processing is simple. If the node was cached, we +deserialize the cache data and fix up cross-references. Otherwise, we +do semantic analysis followed by type checking. We also handle (c) +above; if a module has valid cache data *but* any of its +dependendencies was processed from source, then the module should be +processed from source. + +A relatively simple optimization (outside SCCs) is as follows: if a +node's cache data is valid, but one or more of its dependencies are +out of date so we have to re-parse the node from source, once we have +fully type-checked the node, we can decide whether its symbol table +actually changed compared to the cache data (by reading the cache data +and comparing it to the data we would be writing). If there is no +change we can declare the node up to date, and any node that depends +(and for which we have cached data, and whose other dependencies are +up to date) on it won't need to be re-parsed from source. + +Import cycles +------------- + +Finally we have to decide how to handle (c), import cycles. Here +we'll need a modified version of the original state machine +(build.py), but we only need to do this per SCC, and we won't have to +deal with changes to the list of nodes while we're processing it. + +If all nodes in the SCC have valid cache metadata and all dependencies +outside the SCC are still valid, we can proceed as follows: + + 1. Load cache data for all nodesin the SCC. + + 2. Fix up cross-references for all nodes in the SCC. + +Otherwise, the simplest (but potentially slow) way to proceed is to +invalidate all cache data in the SCC and re-parse all nodes in the SCC +from source. We can do this as follows: + + 1. Parse source for all nodes in the SCC. + + 2. Semantic analysis for all nodes in the SCC. + + 3. Type check all nodes in the SCC. + +(If there are more passes the process is the same -- each pass should +be done for all nodes before starting the next pass for any nodes in +the SCC.) + +We could process the nodes in the SCC in any order. We *might* +process them in the reverse order in which we encountered them when +originally constructing the graph (IIUC that's how the old build.py +deals with cycles). + +Can we do better than re-parsing all nodes in the SCC when any of its +dependencies are out of date? It's doubtful. The optimization +mentioned at the end of the previous section would require re-parsing +and type-checking a node and then comparing its symbol table to the +cached data; but because the node is part of a cycle we can't +technically type-check it until the semantic analysis of all other +nodes in the cycle has completed. (This is an important issue because +we have a cycle of over 500 modules in the server repo. But I'd like +to deal with it later.) + +""" + +import json +import os + +from typing import Any, Dict, List, Set, AbstractSet, Iterable, Iterator, Optional, TypeVar + +from .build import (BuildManager, BuildSource, CacheMeta, FAST_PARSER, + find_cache_meta, find_module, read_with_python_encoding, + write_cache) +from .errors import CompileError +from .fixup import fixup_module_pass_one, fixup_module_pass_two +from .nodes import MypyFile, SymbolTableNode, MODULE_REF +from .parse import parse +from .semanal import FirstPass + + +class State: + """The state for a module. + + It's a package if path ends in __init__.py[i]. + + The source is only used for the -c command line option; in that + case path is None. Otherwise source is None and path isn't. + """ + + manager = None # type: BuildManager + id = None # type: str # Fully qualified module name + path = None # type: Optional[str] # Path to module source + xpath = None # type: str # Path or '' + meta = None # type: Optional[CacheMeta] + data = None # type: Optional[str] + tree = None # type: Optional[MypyFile] + dependencies = None # type: Optional[Set[str]] + + def __init__(self, + id: Optional[str], + path: Optional[str], + source: Optional[str], + manager: BuildManager, + ) -> None: + assert id or path or source, "Neither id, path nor source given" + self.manager = manager + self.id = id or '__main__' + if not path and not source: + path = find_module(id, manager.lib_path) + if not path: + raise CompileError(["mypy: can't find module '%s'" % id]) + self.path = path + self.xpath = path or '' + self.source = source + if path: + self.meta = find_cache_meta(self.id, self.path, manager) + # TODO: Get mtime if not cached. + if self.meta: + self.dependencies = set(self.meta.dependencies) + else: + self.parse_file() + + def is_fresh(self) -> bool: + return self.meta is not None + + def clear_fresh(self) -> None: + self.meta = None + + # Methods for processing cached modules. + + def load_tree(self) -> None: + with open(self.meta.data_json) as f: + data = json.load(f) + # TODO: Assert data file wasn't changed. + self.tree = MypyFile.deserialize(data) + self.manager.modules[self.id] = self.tree + + def fix_cross_refs(self) -> None: + fixup_module_pass_one(self.tree, self.manager.modules) + + def calculate_mros(self) -> None: + fixup_module_pass_two(self.tree, self.manager.modules) + + # Methods for processing modules from source code. + + def parse_file(self) -> None: + if self.tree is not None: + return + + manager = self.manager + modules = manager.modules + + if not self.source: + self.source = read_with_python_encoding(self.path, manager.pyversion) + self.tree = parse_file(self.id, self.path, self.source, manager) + modules[self.id] = self.tree + + if self.tree and '.' in self.id: + # Include module in the symbol table of the enclosing package. + parent, child = self.id.rsplit('.', 1) + if parent in modules: + modules[parent].names[child] = SymbolTableNode(MODULE_REF, self.tree, parent) + + # First pass of semantic analysis is needed before adding dependencies. + first = FirstPass(manager.semantic_analyzer) + first.analyze(self.tree, self.xpath, self.id) + self.tree.names = manager.semantic_analyzer.globals + + # Compute dependencies. + dependencies = set() + aid = self.id + while '.' in aid: + aid, _ = aid.rsplit('.', 1) + dependencies.add(aid) + if self.id != 'builtins': + dependencies.add('builtins') + dependencies.update(id for id, _ in manager.all_imported_modules_in_file(self.tree)) + + if self.dependencies is not None and dependencies != self.dependencies: + print("HELP!! Dependencies changed!") # Probably the file was edited. + print(" Cached:", self.dependencies) + print(" Source:", dependencies) + self.dependencies = dependencies + + def semantic_analysis(self) -> None: + self.manager.semantic_analyzer.visit_file(self.tree, self.xpath) + + def semantic_analysis_pass_three(self) -> None: + self.manager.semantic_analyzer_pass3.visit_file(self.tree, self.xpath) + # TODO: DUMP_TYPE_STATS + + def type_check(self) -> None: + self.manager.type_checker.visit_file(self.tree, self.xpath) + # TODO: DUMP_INFER_STATS, manager.reports.file() + + def write_cache(self) -> None: + if self.path: + write_cache(self.id, self.path, self.tree, list(self.dependencies), self.manager) + + +# TODO: This would make a nice method on BuildManager. +def parse_file(id: str, path: str, source: str, manager: BuildManager) -> MypyFile: + """Parse the source of a file with the given name. + + Raise CompileError if there is a parse error. + """ + errors = manager.errors + num_errs = errors.num_messages() + tree = parse(source, path, errors, + pyversion=manager.pyversion, + custom_typing_module=manager.custom_typing_module, + implicit_any=manager.implicit_any, + fast_parser=FAST_PARSER in manager.flags) + tree._fullname = id + if errors.num_messages() != num_errs: + errors.raise_error() + return tree + + +Graph = Dict[str, State] + + +def dispatch(sources: List[BuildSource], manager: BuildManager) -> None: + manager.log("Using new dependency manager") + graph = load_graph(sources, manager) + manager.log("Loaded graph with %d nodes" % len(graph)) + process_graph(graph, manager) + + +def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph: + """Given some source files, load the full dependency graph.""" + graph = {} # type: Graph + # Seed graph with roots. + for bs in sources: + st = State(bs.module, bs.path, bs.text, manager) + assert st.id not in graph, "TODO: Duplicate module %s" % st.id + graph[st.id] = st + # Collect dependencies. + while True: + new = {} # type: Graph + for st in graph.values(): + for dep in st.dependencies: + if dep not in graph and dep not in new: + # TODO: Implement --silent-imports. + depst = State(dep, None, None, manager) + assert depst.id not in new, "TODO: This is bad %s" % depst.id + new[depst.id] = depst + if not new: + break + graph.update(new) + return graph + + +def process_graph(graph: Graph, manager: BuildManager) -> None: + """Process everyhing in dependency order.""" + sccs = sorted_components(graph) + manager.log("Found %d SCCs" % len(sccs)) + for scc in sccs: + manager.trace("Processing SCC of size %d (%s)" % (len(scc), " ".join(sorted(scc)))) + # TODO: Do something about mtime ordering. + fresh = all(graph[id].is_fresh() for id in scc) + if fresh: + manager.trace(" Looks fresh...") + deps = set() + for id in scc: + deps.update(graph[id].dependencies) + deps -= scc + fresh = all(graph[id].is_fresh() for id in deps) + if fresh: + manager.trace(" Processing as fresh") + process_fresh_scc(graph, scc) + else: + manager.trace(" *** Processing as stale ***") + process_stale_scc(graph, scc) + + +def process_fresh_scc(graph: Graph, scc: AbstractSet[str]) -> None: + """Process the modules in one SCC from their cached data.""" + for id in scc: + graph[id].load_tree() + for id in scc: + graph[id].fix_cross_refs() + for id in scc: + graph[id].calculate_mros() + + +def process_stale_scc(graph: Graph, ascc: AbstractSet[str]) -> None: + """Process the modules in one SCC from source code.""" + if ascc == {'abc', 'typing', 'builtins'}: + # Hack: typing must be processed before builtins. TODO: Why? + scc = ['abc', 'typing', 'builtins'] + else: + scc = sorted(ascc) # Sort for reproducibility. TODO: Why? + for id in scc: + graph[id].clear_fresh() + for id in scc: + # We may already have parsed the module, or not. + graph[id].parse_file() + for id in scc: + graph[id].semantic_analysis() + for id in scc: + graph[id].semantic_analysis_pass_three() + for id in scc: + graph[id].type_check() + # TODO: Only if --incremental. + graph[id].write_cache() + + +# TODO: Use FrozenSet[T]. +def sorted_components(graph: Graph) -> List[AbstractSet[str]]: + """Return the graph's SCCs, topologically sorted by dependencies.""" + # Compute SCCs. + vertices = set(graph) + edges = {id: st.dependencies for id, st in graph.items()} + sccs = list(strongly_connected_components_path(vertices, edges)) + # Topsort. + sccsmap = {id: frozenset(scc) for scc in sccs for id in scc} + data = {} + for scc in sccs: + deps = set() # type: Set[frozenset] + for id in scc: + deps.update(sccsmap[x] for x in graph[id].dependencies) + data[frozenset(scc)] = deps + return list(topsort(data)) + + +# TODO: Use TypeVar T instead of str. +def strongly_connected_components_path(vertices: Set[str], + edges: Dict[str, Set[str]]) -> Iterator[Set[str]]: + """Compute Strongly Connected Components of a graph. + + From http://code.activestate.com/recipes/578507/. + """ + identified = set() # type: Set[str] + stack = [] # type: List[str] + index = {} # type: Dict[str, int] + boundaries = [] # type: List[int] + + def dfs(v: str) -> Iterator[Set[str]]: + index[v] = len(stack) + stack.append(v) + boundaries.append(index[v]) + + for w in edges[v]: + if w not in index: + # For Python >= 3.3, replace with "yield from dfs(w)" + for scc in dfs(w): + yield scc + elif w not in identified: + while index[w] < boundaries[-1]: + boundaries.pop() + + if boundaries[-1] == index[v]: + boundaries.pop() + scc = set(stack[index[v]:]) + del stack[index[v]:] + identified.update(scc) + yield scc + + for v in vertices: + if v not in index: + # For Python >= 3.3, replace with "yield from dfs(v)" + for scc in dfs(v): + yield scc + + +# TODO: Use FrozenSet[T or str] instead of frozenset. +def topsort(data: Dict[frozenset, Set[frozenset]]) -> Iterable[frozenset]: + """Topological sort. Consumes its argument. + + From http://code.activestate.com/recipes/577413/. + """ + for k, v in data.items(): + v.discard(k) # Ignore self dependencies. + for item in set.union(*data.values()) - set(data.keys()): + data[item] = set() + while True: + ready = {item for item, dep in data.items() if not dep} + if not ready: + break + for item in ready: + yield item + data = {item: (dep - ready) + for item, dep in data.items() + if item not in ready} + assert not data, "A cyclic dependency exists amongst %r" % data From 39261a9ca224986d596f08d4e2f7a45b9442c2af Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 10 Mar 2016 19:01:58 -0800 Subject: [PATCH 059/117] Clarify/fix big comment block, add some comments, minor cleanup. --- mypy/depmgr.py | 73 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 24 deletions(-) diff --git a/mypy/depmgr.py b/mypy/depmgr.py index 157b2d3dca36..c4df55744da5 100644 --- a/mypy/depmgr.py +++ b/mypy/depmgr.py @@ -20,7 +20,7 @@ a. Need to parse source modules to determine dependencies. -b. Import cycles. +b. Processing order for modules within an SCC. c. Must order mtimes of files to decide whether to re-process; depends on clock never resetting. @@ -48,7 +48,8 @@ the graph: - for cached nodes use the list of dependencies from the cache - metadata; + metadata (this will be valid even if we later end up re-parsing + the same source); - for uncached nodes parse the file and process all imports found, taking care of (a) above. @@ -63,7 +64,8 @@ Now we can execute steps A-C from the first section. Finding SCCs for step A shouldn't be hard; there's a recipe here: -http://code.activestate.com/recipes/578507/. +http://code.activestate.com/recipes/578507/. There's also a plethora +of topsort recipes, e.g. http://code.activestate.com/recipes/577413/. For single nodes, processing is simple. If the node was cached, we deserialize the cache data and fix up cross-references. Otherwise, we @@ -72,15 +74,16 @@ dependendencies was processed from source, then the module should be processed from source. -A relatively simple optimization (outside SCCs) is as follows: if a -node's cache data is valid, but one or more of its dependencies are -out of date so we have to re-parse the node from source, once we have -fully type-checked the node, we can decide whether its symbol table -actually changed compared to the cache data (by reading the cache data -and comparing it to the data we would be writing). If there is no -change we can declare the node up to date, and any node that depends -(and for which we have cached data, and whose other dependencies are -up to date) on it won't need to be re-parsed from source. +A relatively simple optimization (outside SCCs) we might do in the +future is as follows: if a node's cache data is valid, but one or more +of its dependencies are out of date so we have to re-parse the node +from source, once we have fully type-checked the node, we can decide +whether its symbol table actually changed compared to the cache data +(by reading the cache data and comparing it to the data we would be +writing). If there is no change we can declare the node up to date, +and any node that depends (and for which we have cached data, and +whose other dependencies are up to date) on it won't need to be +re-parsed from source. Import cycles ------------- @@ -93,7 +96,7 @@ If all nodes in the SCC have valid cache metadata and all dependencies outside the SCC are still valid, we can proceed as follows: - 1. Load cache data for all nodesin the SCC. + 1. Load cache data for all nodes in the SCC. 2. Fix up cross-references for all nodes in the SCC. @@ -114,7 +117,7 @@ We could process the nodes in the SCC in any order. We *might* process them in the reverse order in which we encountered them when originally constructing the graph (IIUC that's how the old build.py -deals with cycles). +deals with cycles). For now we'll process them in alphabetical order. Can we do better than re-parsing all nodes in the SCC when any of its dependencies are out of date? It's doubtful. The optimization @@ -156,6 +159,7 @@ class State: id = None # type: str # Fully qualified module name path = None # type: Optional[str] # Path to module source xpath = None # type: str # Path or '' + source = None # type: Optional[str] # Module source code meta = None # type: Optional[CacheMeta] data = None # type: Optional[str] tree = None # type: Optional[MypyFile] @@ -178,11 +182,13 @@ def __init__(self, self.xpath = path or '' self.source = source if path: + # TODO: Only if --incremental. self.meta = find_cache_meta(self.id, self.path, manager) # TODO: Get mtime if not cached. if self.meta: self.dependencies = set(self.meta.dependencies) else: + # Parse the file (and then some) to get the dependencies. self.parse_file() def is_fresh(self) -> bool: @@ -210,6 +216,7 @@ def calculate_mros(self) -> None: def parse_file(self) -> None: if self.tree is not None: + # The file was already parsed (in __init__()). return manager = self.manager @@ -218,30 +225,46 @@ def parse_file(self) -> None: if not self.source: self.source = read_with_python_encoding(self.path, manager.pyversion) self.tree = parse_file(self.id, self.path, self.source, manager) + self.source = None # We won't need it again. modules[self.id] = self.tree if self.tree and '.' in self.id: # Include module in the symbol table of the enclosing package. + # TODO: This is weirdly optional; why is it needed? parent, child = self.id.rsplit('.', 1) if parent in modules: modules[parent].names[child] = SymbolTableNode(MODULE_REF, self.tree, parent) - # First pass of semantic analysis is needed before adding dependencies. + # Do the first pass of semantic analysis: add top-level + # definitions in the file to the symbol table. We must do + # this before processing imports, since this may mark some + # import statements as unreachable. first = FirstPass(manager.semantic_analyzer) first.analyze(self.tree, self.xpath, self.id) + + # Initialize module symbol table, which was populated by the + # semantic analyzer. TODO: can't FirstPass .analyze() do this? self.tree.names = manager.semantic_analyzer.globals # Compute dependencies. dependencies = set() - aid = self.id - while '.' in aid: - aid, _ = aid.rsplit('.', 1) - dependencies.add(aid) + # Start with parent packages. + parent = self.id + while '.' in parent: + parent, _ = parent.rsplit('.', 1) + dependencies.add(parent) + # Every module implicitly depends on builtins. if self.id != 'builtins': dependencies.add('builtins') + # Add all direct imports (this is why we needed the first pass). dependencies.update(id for id, _ in manager.all_imported_modules_in_file(self.tree)) + # If self.dependencies is already set, it was read from the + # cache, but for some reason we're re-parsing the file. + # Double-check that the dependencies still match (otherwise + # the graph is out of date). if self.dependencies is not None and dependencies != self.dependencies: + # TODO: Make this into a reasonable error message. print("HELP!! Dependencies changed!") # Probably the file was edited. print(" Cached:", self.dependencies) print(" Source:", dependencies) @@ -371,7 +394,7 @@ def process_stale_scc(graph: Graph, ascc: AbstractSet[str]) -> None: graph[id].write_cache() -# TODO: Use FrozenSet[T]. +# TODO: Use TypeVar T instead of str. def sorted_components(graph: Graph) -> List[AbstractSet[str]]: """Return the graph's SCCs, topologically sorted by dependencies.""" # Compute SCCs. @@ -380,9 +403,9 @@ def sorted_components(graph: Graph) -> List[AbstractSet[str]]: sccs = list(strongly_connected_components_path(vertices, edges)) # Topsort. sccsmap = {id: frozenset(scc) for scc in sccs for id in scc} - data = {} + data = {} # type: Dict[AbstractSet[str], Set[AbstractSet[str]]] for scc in sccs: - deps = set() # type: Set[frozenset] + deps = set() # type: Set[AbstractSet[str]] for id in scc: deps.update(sccsmap[x] for x in graph[id].dependencies) data[frozenset(scc)] = deps @@ -429,12 +452,13 @@ def dfs(v: str) -> Iterator[Set[str]]: yield scc -# TODO: Use FrozenSet[T or str] instead of frozenset. -def topsort(data: Dict[frozenset, Set[frozenset]]) -> Iterable[frozenset]: +# TODO: Use TypeVar T instead of str. +def topsort(data: Dict[AbstractSet[str], Set[AbstractSet[str]]]) -> Iterable[AbstractSet[str]]: """Topological sort. Consumes its argument. From http://code.activestate.com/recipes/577413/. """ + # TODO: Use a faster algorithm? for k, v in data.items(): v.discard(k) # Ignore self dependencies. for item in set.union(*data.values()) - set(data.keys()): @@ -443,6 +467,7 @@ def topsort(data: Dict[frozenset, Set[frozenset]]) -> Iterable[frozenset]: ready = {item for item, dep in data.items() if not dep} if not ready: break + # TODO: Return the items in a reproducible order. for item in ready: yield item data = {item: (dep - ready) From 0b891aff4f95a602ffbd8b713f8a6ec3c633679d Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 10 Mar 2016 21:29:42 -0800 Subject: [PATCH 060/117] Honor --incremental in depmgr. --- mypy/depmgr.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/mypy/depmgr.py b/mypy/depmgr.py index c4df55744da5..79cb6e660ef2 100644 --- a/mypy/depmgr.py +++ b/mypy/depmgr.py @@ -136,7 +136,8 @@ from typing import Any, Dict, List, Set, AbstractSet, Iterable, Iterator, Optional, TypeVar -from .build import (BuildManager, BuildSource, CacheMeta, FAST_PARSER, +from .build import (BuildManager, BuildSource, CacheMeta, + INCREMENTAL, FAST_PARSER, SILENT_IMPORTS, find_cache_meta, find_module, read_with_python_encoding, write_cache) from .errors import CompileError @@ -181,8 +182,7 @@ def __init__(self, self.path = path self.xpath = path or '' self.source = source - if path: - # TODO: Only if --incremental. + if path and INCREMENTAL in manager.flags: self.meta = find_cache_meta(self.id, self.path, manager) # TODO: Get mtime if not cached. if self.meta: @@ -282,7 +282,7 @@ def type_check(self) -> None: # TODO: DUMP_INFER_STATS, manager.reports.file() def write_cache(self) -> None: - if self.path: + if self.path and INCREMENTAL in self.manager.flags: write_cache(self.id, self.path, self.tree, list(self.dependencies), self.manager) @@ -390,7 +390,6 @@ def process_stale_scc(graph: Graph, ascc: AbstractSet[str]) -> None: graph[id].semantic_analysis_pass_three() for id in scc: graph[id].type_check() - # TODO: Only if --incremental. graph[id].write_cache() From 4be5af6cc9a8ffcbb75fb036df4a1cbf87c1088b Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 11 Mar 2016 08:14:50 -0800 Subject: [PATCH 061/117] Tweak some log/trace calls. Add relative time to log/trace output. --- mypy/build.py | 61 ++++++++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index d6b4305e35d2..6ae87013d13b 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -17,6 +17,7 @@ import shlex import subprocess import sys +import time import re from os.path import dirname, basename @@ -208,30 +209,33 @@ def build(sources: List[BuildSource], implicit_any=implicit_any, reports=reports) - if INCREMENTAL in flags: + if INCREMENTAL in flags and VERBOSE in flags: from .depmgr import dispatch dispatch(sources, manager) - return BuildResult(manager.modules, manager.type_checker.type_map) - - # Construct information that describes the initial files. __main__ is the - # implicit module id and the import context is empty initially ([]). - initial_states = [] # type: List[UnprocessedBase] - for source in sources: - initial_state = None # type: Optional[UnprocessedBase] - if source.module != '__main__' and source.path is not None: - initial_state = manager.maybe_make_cached_state(source.module, source.path) - # TODO: else if using '-m x' try the cache too - if initial_state is None: - content = source.load(lib_path, pyversion) - info = StateInfo(source.effective_path, source.module, [], manager) - initial_state = UnprocessedFile(info, content) - initial_states.append(initial_state) - - # Perform the build by sending the files as new file (UnprocessedFile is the - # initial state of all files) to the manager. The manager will process the - # file and all dependant modules recursively. - result = manager.process(initial_states) + result = BuildResult(manager.modules, manager.type_checker.type_map) + else: + # Construct information that describes the initial files. __main__ is the + # implicit module id and the import context is empty initially ([]). + initial_states = [] # type: List[UnprocessedBase] + for source in sources: + initial_state = None # type: Optional[UnprocessedBase] + if source.module != '__main__' and source.path is not None: + initial_state = manager.maybe_make_cached_state(source.module, source.path) + # TODO: else if using '-m x' try the cache too + if initial_state is None: + content = source.load(lib_path, pyversion) + info = StateInfo(source.effective_path, source.module, [], manager) + initial_state = UnprocessedFile(info, content) + initial_states.append(initial_state) + + # Perform the build by sending the files as new file (UnprocessedFile is the + # initial state of all files) to the manager. The manager will process the + # file and all dependant modules recursively. + result = manager.process(initial_states) + reports.finish() + manager.log("Build finished with %d modules and %d types" % + (len(result.files), len(result.types))) return result @@ -402,6 +406,7 @@ def __init__(self, data_dir: str, custom_typing_module: str, implicit_any: bool, reports: Reports) -> None: + self.start_time = time.time() self.data_dir = data_dir self.errors = Errors() self.errors.set_ignore_prefix(ignore_prefix) @@ -617,14 +622,14 @@ def maybe_make_cached_state(self, id: str, path: str) -> Optional['UnprocessedBa info = StateInfo(path, id, self.errors.import_context(), self) return ProbablyCachedFile(info, m) - def log(self, message: str) -> None: + def log(self, *message: str) -> None: if VERBOSE in self.flags: - print('LOG:', message, file=sys.stderr) + print('%.3f:LOG: ' % (time.time() - self.start_time), *message, file=sys.stderr) sys.stderr.flush() - def trace(self, message: str) -> None: + def trace(self, *message: str) -> None: if self.flags.count(VERBOSE) >= 2: - print('TRACE:', message, file=sys.stderr) + print('%.3f:TRACE:' % (time.time() - self.start_time), *message, file=sys.stderr) sys.stderr.flush() @@ -1399,7 +1404,7 @@ def get_cache_names(id: str, path: str, pyversion: Tuple[int, int]) -> Tuple[str def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[CacheMeta]: meta_json, data_json = get_cache_names(id, path, manager.pyversion) - manager.log('Finding {} {}'.format(id, data_json)) + manager.trace('Looking for {} {}'.format(id, data_json)) if not os.path.exists(meta_json): return None with open(meta_json, 'r') as f: @@ -1425,7 +1430,7 @@ def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[Cache # TODO: Share stat() outcome with find_module() st = os.stat(path) # TODO: Errors if st.st_mtime != m.mtime or st.st_size != m.size: - manager.log('Metadata abandoned because of modified file') + manager.log('Metadata abandoned because of modified file {}'.format(path)) return None # It's a match on (id, path, mtime, size). # Check data_json; assume if its mtime matches it's good. @@ -1453,7 +1458,7 @@ def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: def write_cache(id: str, path: str, tree: MypyFile, dependencies: List[str], manager: BuildManager) -> None: path = os.path.abspath(path) - manager.log('Dumping {} to {}'.format(id, path)) + manager.trace('Dumping {} {}'.format(id, path)) st = os.stat(path) # TODO: Errors mtime = st.st_mtime size = st.st_size From 1d6d92111bfaab8c1c783d715776ea899b86ca46 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 11 Mar 2016 10:30:12 -0800 Subject: [PATCH 062/117] Process nodes in an SCC in a deterministic order. The order is derived from the order in which modules and imports were encountered. Also treat parent packages as new roots rather than as dependencies. (It still remains to be seen if they are needed at all.) --- mypy/depmgr.py | 90 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 59 insertions(+), 31 deletions(-) diff --git a/mypy/depmgr.py b/mypy/depmgr.py index 79cb6e660ef2..0c2068b6b8a9 100644 --- a/mypy/depmgr.py +++ b/mypy/depmgr.py @@ -157,6 +157,8 @@ class State: """ manager = None # type: BuildManager + order_counter = 0 # Class variable + order = None # type: int # Order in which modules were encountered id = None # type: str # Fully qualified module name path = None # type: Optional[str] # Path to module source xpath = None # type: str # Path or '' @@ -164,7 +166,8 @@ class State: meta = None # type: Optional[CacheMeta] data = None # type: Optional[str] tree = None # type: Optional[MypyFile] - dependencies = None # type: Optional[Set[str]] + dependencies = None # type: Optional[List[str]] + roots = None # type: Optional[List[str]] def __init__(self, id: Optional[str], @@ -174,8 +177,12 @@ def __init__(self, ) -> None: assert id or path or source, "Neither id, path nor source given" self.manager = manager + State.order_counter += 1 + self.order = State.order_counter self.id = id or '__main__' if not path and not source: + # TODO: If PY2, replace builtins with __builtin__; + # see mypy.build.UnprocessedBase.import_module(). path = find_module(id, manager.lib_path) if not path: raise CompileError(["mypy: can't find module '%s'" % id]) @@ -185,16 +192,28 @@ def __init__(self, if path and INCREMENTAL in manager.flags: self.meta = find_cache_meta(self.id, self.path, manager) # TODO: Get mtime if not cached. + self.add_roots() if self.meta: - self.dependencies = set(self.meta.dependencies) + self.dependencies = self.meta.dependencies else: # Parse the file (and then some) to get the dependencies. self.parse_file() + def add_roots(self) -> None: + # All parent packages are new roots. + roots = [] + parent = self.id + while '.' in parent: + parent, _ = parent.rsplit('.', 1) + roots.append(parent) + self.roots = roots + def is_fresh(self) -> bool: + """Return whether the cache data for this file is fresh.""" return self.meta is not None def clear_fresh(self) -> None: + """Throw away the cache data for this file, marking it as stale.""" self.meta = None # Methods for processing cached modules. @@ -233,7 +252,10 @@ def parse_file(self) -> None: # TODO: This is weirdly optional; why is it needed? parent, child = self.id.rsplit('.', 1) if parent in modules: + manager.trace("Added %s.%s" % (parent, child)) modules[parent].names[child] = SymbolTableNode(MODULE_REF, self.tree, parent) + else: + manager.log("Hm... couldn't add %s.%s" % (parent, child)) # Do the first pass of semantic analysis: add top-level # definitions in the file to the symbol table. We must do @@ -246,18 +268,12 @@ def parse_file(self) -> None: # semantic analyzer. TODO: can't FirstPass .analyze() do this? self.tree.names = manager.semantic_analyzer.globals - # Compute dependencies. - dependencies = set() - # Start with parent packages. - parent = self.id - while '.' in parent: - parent, _ = parent.rsplit('.', 1) - dependencies.add(parent) - # Every module implicitly depends on builtins. - if self.id != 'builtins': - dependencies.add('builtins') + # Compute (direct) dependencies. # Add all direct imports (this is why we needed the first pass). - dependencies.update(id for id, _ in manager.all_imported_modules_in_file(self.tree)) + dependencies = [id for id, _ in manager.all_imported_modules_in_file(self.tree)] + # Every module implicitly depends on builtins. + if self.id != 'builtins' and 'builtins' not in dependencies: + dependencies.append('builtins') # If self.dependencies is already set, it was read from the # cache, but for some reason we're re-parsing the file. @@ -330,6 +346,8 @@ def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph: for dep in st.dependencies: if dep not in graph and dep not in new: # TODO: Implement --silent-imports. + # TODO: Import context (see mypy.build.UnprocessedFile.process()). + # TODO: Error handling (ditto). depst = State(dep, None, None, manager) assert depst.id not in new, "TODO: This is bad %s" % depst.id new[depst.id] = depst @@ -340,29 +358,42 @@ def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph: def process_graph(graph: Graph, manager: BuildManager) -> None: - """Process everyhing in dependency order.""" + """Process everything in dependency order.""" sccs = sorted_components(graph) - manager.log("Found %d SCCs" % len(sccs)) - for scc in sccs: - manager.trace("Processing SCC of size %d (%s)" % (len(scc), " ".join(sorted(scc)))) + manager.log("Found %d SCCs; largest has %d nodes" % + (len(sccs), max(len(scc) for scc in sccs))) + for ascc in sccs: + # Sort the SCC's nodes in *reverse* order or encounter. + # This is a heuristic for handling import cycles. + # Note that ascc is a set, and scc is a list. + scc = sorted(ascc, key=lambda id: -graph[id].order) + # If builtins is in the list, move it last. + if 'builtins' in ascc: + scc.remove('builtins') + scc.append('builtins') # TODO: Do something about mtime ordering. - fresh = all(graph[id].is_fresh() for id in scc) + fresh = scc_is_fresh = all(graph[id].is_fresh() for id in scc) if fresh: - manager.trace(" Looks fresh...") deps = set() for id in scc: deps.update(graph[id].dependencies) - deps -= scc + deps -= ascc fresh = all(graph[id].is_fresh() for id in deps) if fresh: - manager.trace(" Processing as fresh") + fresh_msg = "fresh" + elif scc_is_fresh: + fresh_msg = "stale due to stale deps" + else: + fresh_msg = "stale" + manager.log("Processing SCC of size %d as %s (%s)" % + (len(scc), fresh_msg, " ".join(scc))) + if fresh: process_fresh_scc(graph, scc) else: - manager.trace(" *** Processing as stale ***") process_stale_scc(graph, scc) -def process_fresh_scc(graph: Graph, scc: AbstractSet[str]) -> None: +def process_fresh_scc(graph: Graph, scc: List[str]) -> None: """Process the modules in one SCC from their cached data.""" for id in scc: graph[id].load_tree() @@ -372,17 +403,13 @@ def process_fresh_scc(graph: Graph, scc: AbstractSet[str]) -> None: graph[id].calculate_mros() -def process_stale_scc(graph: Graph, ascc: AbstractSet[str]) -> None: +def process_stale_scc(graph: Graph, scc: List[str]) -> None: """Process the modules in one SCC from source code.""" - if ascc == {'abc', 'typing', 'builtins'}: - # Hack: typing must be processed before builtins. TODO: Why? - scc = ['abc', 'typing', 'builtins'] - else: - scc = sorted(ascc) # Sort for reproducibility. TODO: Why? for id in scc: graph[id].clear_fresh() for id in scc: # We may already have parsed the module, or not. + # If the former, parse_file() is a no-op. graph[id].parse_file() for id in scc: graph[id].semantic_analysis() @@ -413,7 +440,7 @@ def sorted_components(graph: Graph) -> List[AbstractSet[str]]: # TODO: Use TypeVar T instead of str. def strongly_connected_components_path(vertices: Set[str], - edges: Dict[str, Set[str]]) -> Iterator[Set[str]]: + edges: Dict[str, List[str]]) -> Iterator[Set[str]]: """Compute Strongly Connected Components of a graph. From http://code.activestate.com/recipes/578507/. @@ -466,7 +493,8 @@ def topsort(data: Dict[AbstractSet[str], Set[AbstractSet[str]]]) -> Iterable[Abs ready = {item for item, dep in data.items() if not dep} if not ready: break - # TODO: Return the items in a reproducible order. + # TODO: Return the items in a reproducible order, or return + # the entire set of items. for item in ready: yield item data = {item: (dep - ready) From fb24521b11b112f5ee05d38a4d7b32069c64f02d Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 11 Mar 2016 11:14:05 -0800 Subject: [PATCH 063/117] Satisfy flake8. Warn instead of die on missing module ref. --- mypy/nodes.py | 7 +++++-- mypy/types.py | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/mypy/nodes.py b/mypy/nodes.py index fc67dd1da33f..1482881ff4a8 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -1977,7 +1977,10 @@ def serialize(self, prefix: str, name: str) -> JsonDict: if not self.module_public: data['module_public'] = False if self.kind == MODULE_REF: - data['cross_ref'] = self.node.fullname() + if self.node is None: + print("*** Missing module cross ref in %s for %s" % (prefix, name)) + else: + data['cross_ref'] = self.node.fullname() else: if self.node is not None: if prefix is not None: @@ -1986,7 +1989,7 @@ def serialize(self, prefix: str, name: str) -> JsonDict: else: fullname = self.node.fullname() if (fullname is not None and '.' in fullname and - fullname != prefix + '.' + name): + fullname != prefix + '.' + name): data['cross_ref'] = fullname return data data['node'] = self.node.serialize() diff --git a/mypy/types.py b/mypy/types.py index b84ebe1b6133..c114c001e30f 100644 --- a/mypy/types.py +++ b/mypy/types.py @@ -85,7 +85,8 @@ def deserialize(cls, data: JsonDict) -> 'TypeVarDef': assert data['.class'] == 'TypeVarDef' return TypeVarDef(data['name'], data['id'], - None if data['values'] is None else [Type.deserialize(v) for v in data['values']], + None if data['values'] is None + else [Type.deserialize(v) for v in data['values']], Type.deserialize(data['upper_bound']), data['variance'], ) From 6094ddb606be7a67e0167c3e194e301dfdf877c5 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 11 Mar 2016 11:40:07 -0800 Subject: [PATCH 064/117] Check for errors at various points. --- mypy/depmgr.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/mypy/depmgr.py b/mypy/depmgr.py index 0c2068b6b8a9..decbfe5876c7 100644 --- a/mypy/depmgr.py +++ b/mypy/depmgr.py @@ -216,6 +216,12 @@ def clear_fresh(self) -> None: """Throw away the cache data for this file, marking it as stale.""" self.meta = None + def check_blockers(self) -> None: + """Raise CompileError if a blocking error is detected.""" + if self.manager.errors.is_blockers(): + self.manager.log("Bailing due to blocking errors") + self.manager.errors.raise_error() + # Methods for processing cached modules. def load_tree(self) -> None: @@ -265,7 +271,8 @@ def parse_file(self) -> None: first.analyze(self.tree, self.xpath, self.id) # Initialize module symbol table, which was populated by the - # semantic analyzer. TODO: can't FirstPass .analyze() do this? + # semantic analyzer. + # TODO: Why can't FirstPass .analyze() do this? self.tree.names = manager.semantic_analyzer.globals # Compute (direct) dependencies. @@ -285,17 +292,21 @@ def parse_file(self) -> None: print(" Cached:", self.dependencies) print(" Source:", dependencies) self.dependencies = dependencies + self.check_blockers() def semantic_analysis(self) -> None: self.manager.semantic_analyzer.visit_file(self.tree, self.xpath) + self.check_blockers() def semantic_analysis_pass_three(self) -> None: self.manager.semantic_analyzer_pass3.visit_file(self.tree, self.xpath) # TODO: DUMP_TYPE_STATS + self.check_blockers() def type_check(self) -> None: self.manager.type_checker.visit_file(self.tree, self.xpath) # TODO: DUMP_INFER_STATS, manager.reports.file() + self.check_blockers() def write_cache(self) -> None: if self.path and INCREMENTAL in self.manager.flags: @@ -317,6 +328,7 @@ def parse_file(id: str, path: str, source: str, manager: BuildManager) -> MypyFi fast_parser=FAST_PARSER in manager.flags) tree._fullname = id if errors.num_messages() != num_errs: + manager.log("Bailing due to parse errors") errors.raise_error() return tree @@ -329,6 +341,9 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> None: graph = load_graph(sources, manager) manager.log("Loaded graph with %d nodes" % len(graph)) process_graph(graph, manager) + if manager.errors.is_errors(): + manager.log("Found %d errors (before de-duping)" % manager.errors.num_messages()) + manager.errors.raise_error() def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph: @@ -420,7 +435,6 @@ def process_stale_scc(graph: Graph, scc: List[str]) -> None: graph[id].write_cache() -# TODO: Use TypeVar T instead of str. def sorted_components(graph: Graph) -> List[AbstractSet[str]]: """Return the graph's SCCs, topologically sorted by dependencies.""" # Compute SCCs. @@ -438,7 +452,6 @@ def sorted_components(graph: Graph) -> List[AbstractSet[str]]: return list(topsort(data)) -# TODO: Use TypeVar T instead of str. def strongly_connected_components_path(vertices: Set[str], edges: Dict[str, List[str]]) -> Iterator[Set[str]]: """Compute Strongly Connected Components of a graph. @@ -478,7 +491,6 @@ def dfs(v: str) -> Iterator[Set[str]]: yield scc -# TODO: Use TypeVar T instead of str. def topsort(data: Dict[AbstractSet[str], Set[AbstractSet[str]]]) -> Iterable[AbstractSet[str]]: """Topological sort. Consumes its argument. From 6d065fdbe01c06a6d10d9112bdc2d9e9488df163 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 11 Mar 2016 11:49:56 -0800 Subject: [PATCH 065/117] Add -i as shortcut for --incremental. Pass -i -i to use new depmgr. --- mypy/build.py | 2 +- mypy/main.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 6ae87013d13b..e0d2fce0680c 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -209,7 +209,7 @@ def build(sources: List[BuildSource], implicit_any=implicit_any, reports=reports) - if INCREMENTAL in flags and VERBOSE in flags: + if flags.count(INCREMENTAL) >= 2: from .depmgr import dispatch dispatch(sources, manager) result = BuildResult(manager.modules, manager.type_checker.type_map) diff --git a/mypy/main.py b/mypy/main.py index a7524d67eb44..83df106b2575 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -181,7 +181,7 @@ def process_options(args: List[str]) -> Tuple[List[BuildSource], Options]: elif args[0] == '--disallow-untyped-calls': options.build_flags.append(build.DISALLOW_UNTYPED_CALLS) args = args[1:] - elif args[0] == '--incremental': + elif args[0] in ('--incremental', '-i'): options.build_flags.append(build.INCREMENTAL) args = args[1:] elif args[0] in ('--version', '-V'): @@ -319,7 +319,7 @@ def usage(msg: str = None) -> None: -s, --silent-imports don't follow imports to .py files --disallow-untyped-calls disallow calling functions without type annotations from functions with type annotations - --incremental incremental mode: cache type-checking results + -i, --incremental incremental mode: cache type-checking results --implicit-any behave as though all functions were annotated with Any -f, --dirty-stubs don't warn if typeshed is out of sync --pdb invoke pdb on fatal error From 032f8d5c191378490e3fbe751729b0ce138516f3 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 11 Mar 2016 12:18:31 -0800 Subject: [PATCH 066/117] Don't write cache if we have errors. Improve fresh/stale logging. --- mypy/depmgr.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/mypy/depmgr.py b/mypy/depmgr.py index decbfe5876c7..1d683ba8572e 100644 --- a/mypy/depmgr.py +++ b/mypy/depmgr.py @@ -309,7 +309,7 @@ def type_check(self) -> None: self.check_blockers() def write_cache(self) -> None: - if self.path and INCREMENTAL in self.manager.flags: + if self.path and INCREMENTAL in self.manager.flags and not self.manager.errors.is_errors(): write_cache(self.id, self.path, self.tree, list(self.dependencies), self.manager) @@ -387,21 +387,25 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: scc.remove('builtins') scc.append('builtins') # TODO: Do something about mtime ordering. - fresh = scc_is_fresh = all(graph[id].is_fresh() for id in scc) + stale_scc = {id for id in scc if not graph[id].is_fresh()} + fresh = not stale_scc if fresh: deps = set() for id in scc: deps.update(graph[id].dependencies) deps -= ascc - fresh = all(graph[id].is_fresh() for id in deps) + stale_deps = {id for id in deps if not graph[id].is_fresh()} + fresh = not stale_deps + else: + stale_deps = {} # Shouldn't be needed. if fresh: fresh_msg = "fresh" - elif scc_is_fresh: - fresh_msg = "stale due to stale deps" + elif stale_scc: + fresh_msg = "inherently stale (%s)" % " ".join(sorted(stale_scc)) else: - fresh_msg = "stale" - manager.log("Processing SCC of size %d as %s (%s)" % - (len(scc), fresh_msg, " ".join(scc))) + fresh_msg = "stale due to deps (%s)" % " ".join(sorted(stale_deps)) + manager.log("Processing SCC of size %d (%s) as %s" % + (len(scc), " ".join(scc), fresh_msg)) if fresh: process_fresh_scc(graph, scc) else: From 2258ccd94198bc451df85cf057f832b8f814bb3d Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 11 Mar 2016 12:32:21 -0800 Subject: [PATCH 067/117] Deal with __builtin__/builtins in Python 2 mode. --- mypy/depmgr.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/mypy/depmgr.py b/mypy/depmgr.py index 1d683ba8572e..e47f81c66b5f 100644 --- a/mypy/depmgr.py +++ b/mypy/depmgr.py @@ -181,9 +181,18 @@ def __init__(self, self.order = State.order_counter self.id = id or '__main__' if not path and not source: - # TODO: If PY2, replace builtins with __builtin__; - # see mypy.build.UnprocessedBase.import_module(). - path = find_module(id, manager.lib_path) + file_id = id + if id == 'builtins' and manager.pyversion[0] == 2: + # The __builtin__ module is called internally by mypy + # 'builtins' in Python 2 mode (similar to Python 3), + # but the stub file is __builtin__.pyi. The reason is + # that a lot of code hard-codes 'builtins.x' and it's + # easier to work it around like this. It also means + # that the implementation can mostly ignore the + # difference and just assume 'builtins' everywhere, + # which simplifies code. + file_id = '__builtin__' + path = find_module(file_id, manager.lib_path) if not path: raise CompileError(["mypy: can't find module '%s'" % id]) self.path = path From d41e7d4bd0a83b3ce94b5d2901bb33212190a9e2 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 11 Mar 2016 12:39:40 -0800 Subject: [PATCH 068/117] Improve fresh/stale logging. Switch to absolute imports. --- mypy/build.py | 2 +- mypy/depmgr.py | 29 ++++++++++++++--------------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index e0d2fce0680c..7e39948126e9 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -210,7 +210,7 @@ def build(sources: List[BuildSource], reports=reports) if flags.count(INCREMENTAL) >= 2: - from .depmgr import dispatch + from mypy.depmgr import dispatch dispatch(sources, manager) result = BuildResult(manager.modules, manager.type_checker.type_map) else: diff --git a/mypy/depmgr.py b/mypy/depmgr.py index e47f81c66b5f..0c83ae4bcf90 100644 --- a/mypy/depmgr.py +++ b/mypy/depmgr.py @@ -136,15 +136,15 @@ from typing import Any, Dict, List, Set, AbstractSet, Iterable, Iterator, Optional, TypeVar -from .build import (BuildManager, BuildSource, CacheMeta, +from mypy.build import (BuildManager, BuildSource, CacheMeta, INCREMENTAL, FAST_PARSER, SILENT_IMPORTS, find_cache_meta, find_module, read_with_python_encoding, write_cache) -from .errors import CompileError -from .fixup import fixup_module_pass_one, fixup_module_pass_two -from .nodes import MypyFile, SymbolTableNode, MODULE_REF -from .parse import parse -from .semanal import FirstPass +from mypy.errors import CompileError +from mypy.fixup import fixup_module_pass_one, fixup_module_pass_two +from mypy.nodes import MypyFile, SymbolTableNode, MODULE_REF +from mypy.parse import parse +from mypy.semanal import FirstPass class State: @@ -398,19 +398,18 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: # TODO: Do something about mtime ordering. stale_scc = {id for id in scc if not graph[id].is_fresh()} fresh = not stale_scc - if fresh: - deps = set() - for id in scc: - deps.update(graph[id].dependencies) - deps -= ascc - stale_deps = {id for id in deps if not graph[id].is_fresh()} - fresh = not stale_deps - else: - stale_deps = {} # Shouldn't be needed. + deps = set() + for id in scc: + deps.update(graph[id].dependencies) + deps -= ascc + stale_deps = {id for id in deps if not graph[id].is_fresh()} + fresh = fresh and not stale_deps if fresh: fresh_msg = "fresh" elif stale_scc: fresh_msg = "inherently stale (%s)" % " ".join(sorted(stale_scc)) + if stale_deps: + fresh_msg += " with stale deps (%s)" % " ".join(sorted(stale_deps)) else: fresh_msg = "stale due to deps (%s)" % " ".join(sorted(stale_deps)) manager.log("Processing SCC of size %d (%s) as %s" % From a6f0cfc8b705d499940d544d388efc3080fb45a0 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 11 Mar 2016 12:54:33 -0800 Subject: [PATCH 069/117] Honor manager.target field. --- mypy/depmgr.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mypy/depmgr.py b/mypy/depmgr.py index 0c83ae4bcf90..6e8b8ff021c0 100644 --- a/mypy/depmgr.py +++ b/mypy/depmgr.py @@ -137,7 +137,7 @@ from typing import Any, Dict, List, Set, AbstractSet, Iterable, Iterator, Optional, TypeVar from mypy.build import (BuildManager, BuildSource, CacheMeta, - INCREMENTAL, FAST_PARSER, SILENT_IMPORTS, + INCREMENTAL, FAST_PARSER, SILENT_IMPORTS, TYPE_CHECK, find_cache_meta, find_module, read_with_python_encoding, write_cache) from mypy.errors import CompileError @@ -313,6 +313,8 @@ def semantic_analysis_pass_three(self) -> None: self.check_blockers() def type_check(self) -> None: + if self.manager.target < TYPE_CHECK: + return self.manager.type_checker.visit_file(self.tree, self.xpath) # TODO: DUMP_INFER_STATS, manager.reports.file() self.check_blockers() From 209c321930f68af4b6f1a65a59427bd360ec51ed Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 11 Mar 2016 13:12:43 -0800 Subject: [PATCH 070/117] Some updates to module docstring. --- mypy/depmgr.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/mypy/depmgr.py b/mypy/depmgr.py index 6e8b8ff021c0..a9afa9cf5367 100644 --- a/mypy/depmgr.py +++ b/mypy/depmgr.py @@ -114,10 +114,11 @@ be done for all nodes before starting the next pass for any nodes in the SCC.) -We could process the nodes in the SCC in any order. We *might* -process them in the reverse order in which we encountered them when -originally constructing the graph (IIUC that's how the old build.py -deals with cycles). For now we'll process them in alphabetical order. +We could process the nodes in the SCC in any order. For sentimental +reasons, I've decided to process them in the reverse order in which we +encountered them when originally constructing the graph. That's how +the old build.py deals with cycles, and at least this reproduces the +previous implementation more accurately. Can we do better than re-parsing all nodes in the SCC when any of its dependencies are out of date? It's doubtful. The optimization @@ -129,6 +130,14 @@ we have a cycle of over 500 modules in the server repo. But I'd like to deal with it later.) +Additional wrinkles +------------------- + +During implementation more wrinkles were found. + +- When a submodule of a package (e.g. x.y) is encountered, the parent + package (e.g. x) must also be loaded, but it is not strictly a + dependency. See State.add_roots() below. """ import json From 1eba74d33bf3f3864b424060b400967e3aacc646 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 11 Mar 2016 14:13:44 -0800 Subject: [PATCH 071/117] Move module_not_found() into BuildManager. --- mypy/build.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 7e39948126e9..be70dbe9040d 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -613,6 +613,22 @@ def is_module(self, id: str) -> bool: """Is there a file in the file system corresponding to module id?""" return find_module(id, self.lib_path) is not None + def module_not_found(self, path: str, line: int, id: str) -> None: + self.errors.set_file(path) + stub_msg = "(Stub files are from https://github.com/python/typeshed)" + if ((self.pyversion[0] == 2 and moduleinfo.is_py2_std_lib_module(id)) or + (self.pyversion[0] >= 3 and moduleinfo.is_py3_std_lib_module(id))): + self.errors.report( + line, "No library stub file for standard library module '{}'".format(id)) + self.errors.report(line, stub_msg, severity='note', only_once=True) + elif moduleinfo.is_third_party_module(id): + self.errors.report(line, "No library stub file for module '{}'".format(id)) + self.errors.report(line, stub_msg, severity='note', only_once=True) + else: + self.errors.report(line, "Cannot find module named '{}'".format(id)) + self.errors.report(line, "(Perhaps setting MYPYPATH would help)", severity='note', + only_once=True) + def maybe_make_cached_state(self, id: str, path: str) -> Optional['UnprocessedBase']: if INCREMENTAL not in self.flags: return None @@ -760,22 +776,6 @@ def fail(self, path: str, line: int, msg: str, blocker: bool = True) -> None: self.errors().set_file(path) self.errors().report(line, msg, blocker=blocker) - def module_not_found(self, path: str, line: int, id: str) -> None: - self.errors().set_file(path) - stub_msg = "(Stub files are from https://github.com/python/typeshed)" - if ((self.manager.pyversion[0] == 2 and moduleinfo.is_py2_std_lib_module(id)) or - (self.manager.pyversion[0] >= 3 and moduleinfo.is_py3_std_lib_module(id))): - self.errors().report( - line, "No library stub file for standard library module '{}'".format(id)) - self.errors().report(line, stub_msg, severity='note', only_once=True) - elif moduleinfo.is_third_party_module(id): - self.errors().report(line, "No library stub file for module '{}'".format(id)) - self.errors().report(line, stub_msg, severity='note', only_once=True) - else: - self.errors().report(line, "Cannot find module named '{}'".format(id)) - self.errors().report(line, "(Perhaps setting MYPYPATH would help)", severity='note', - only_once=True) - class UnprocessedBase(State): def __init__(self, info: StateInfo) -> None: @@ -1030,7 +1030,7 @@ def load_dependencies(self): if self.silent: self.manager.missing_modules.add(p) else: - self.module_not_found(self.path, 1, p) + self.manager.module_not_found(self.path, 1, p) else: self.dependencies.append(p) @@ -1088,7 +1088,7 @@ def process(self) -> None: if (line not in tree.ignored_lines and 'import' not in tree.weak_opts and not self.silent): - self.module_not_found(self.path, line, id) + self.manager.module_not_found(self.path, line, id) self.manager.missing_modules.add(id) # Replace this state object with a parsed state in BuildManager. From 24c34e96d4c51877916d35696f150fc4352552cc Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 11 Mar 2016 14:18:09 -0800 Subject: [PATCH 072/117] Improve error handling around module loading. On bad imports honor --silent, # type: ignore, and import weak opt. Support NEWIMPORT=1 to force using the new dependency manager. --- mypy/build.py | 2 +- mypy/depmgr.py | 131 +++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 106 insertions(+), 27 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index be70dbe9040d..cbc16a5840ac 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -209,7 +209,7 @@ def build(sources: List[BuildSource], implicit_any=implicit_any, reports=reports) - if flags.count(INCREMENTAL) >= 2: + if flags.count(INCREMENTAL) >= 2 or os.getenv("NEWINCREMENTAL"): from mypy.depmgr import dispatch dispatch(sources, manager) result = BuildResult(manager.modules, manager.type_checker.type_map) diff --git a/mypy/depmgr.py b/mypy/depmgr.py index a9afa9cf5367..45a9e7955a8e 100644 --- a/mypy/depmgr.py +++ b/mypy/depmgr.py @@ -140,10 +140,11 @@ dependency. See State.add_roots() below. """ +import contextlib import json import os -from typing import Any, Dict, List, Set, AbstractSet, Iterable, Iterator, Optional, TypeVar +from typing import Any, Dict, List, Set, AbstractSet, Tuple, Iterable, Iterator, Optional, TypeVar from mypy.build import (BuildManager, BuildSource, CacheMeta, INCREMENTAL, FAST_PARSER, SILENT_IMPORTS, TYPE_CHECK, @@ -156,6 +157,10 @@ from mypy.semanal import FirstPass +class ModuleNotFound(Exception): + """Control flow exception to signal that a module was not found.""" + + class State: """The state for a module. @@ -175,21 +180,32 @@ class State: meta = None # type: Optional[CacheMeta] data = None # type: Optional[str] tree = None # type: Optional[MypyFile] - dependencies = None # type: Optional[List[str]] + dependencies = None # type: List[str] + dep_line_map = None # tyoe: Dict[str, int] # Line number where imported roots = None # type: Optional[List[str]] + import_context = None # type: List[Tuple[str, int]] + imported_from = None # type: Optional[Tuple[State, int]] def __init__(self, id: Optional[str], path: Optional[str], source: Optional[str], manager: BuildManager, + imported_from: Tuple['State', int] = None, ) -> None: - assert id or path or source, "Neither id, path nor source given" + assert id or path or source is not None, "Neither id, path nor source given" self.manager = manager State.order_counter += 1 self.order = State.order_counter + self.imported_from = imported_from + if imported_from: + caller_state, caller_line = imported_from + self.import_context = caller_state.import_context[:] + self.import_context.append((caller_state.xpath, caller_line)) + else: + self.import_context = [] self.id = id or '__main__' - if not path and not source: + if not path and source is None: file_id = id if id == 'builtins' and manager.pyversion[0] == 2: # The __builtin__ module is called internally by mypy @@ -203,22 +219,44 @@ def __init__(self, file_id = '__builtin__' path = find_module(file_id, manager.lib_path) if not path: - raise CompileError(["mypy: can't find module '%s'" % id]) + # Could not find a module. Typically the reason is a + # misspelled module name, missing stub, module not in + # search path or the module has not been installed. + if self.imported_from: + caller_state, caller_line = self.imported_from + if not (SILENT_IMPORTS in manager.flags or + (caller_state.tree is not None and + (caller_line in caller_state.tree.ignored_lines or + 'import' in caller_state.tree.weak_opts))): + ##print(caller_state.import_context, self.import_context, caller_state.xpath, self.xpath, caller_state.id) + save_import_context = manager.errors.import_context() + manager.errors.set_import_context(caller_state.import_context) + manager.module_not_found(caller_state.xpath, caller_line, id) + manager.errors.set_import_context(save_import_context) + manager.missing_modules.add(id) + raise ModuleNotFound + else: + # If this is a root it's always fatal. + # TODO: This might hide non-fatal errors from + # roots processed earlier. + raise CompileError(["mypy: can't find module '%s'" % id]) self.path = path self.xpath = path or '' self.source = source - if path and INCREMENTAL in manager.flags: + if path and source is None and INCREMENTAL in manager.flags: self.meta = find_cache_meta(self.id, self.path, manager) # TODO: Get mtime if not cached. self.add_roots() if self.meta: self.dependencies = self.meta.dependencies + self.dep_line_map = {} else: # Parse the file (and then some) to get the dependencies. self.parse_file() def add_roots(self) -> None: # All parent packages are new roots. + # TODO: Use build.super_packages()? roots = [] parent = self.id while '.' in parent: @@ -240,6 +278,14 @@ def check_blockers(self) -> None: self.manager.log("Bailing due to blocking errors") self.manager.errors.raise_error() + @contextlib.contextmanager + def wrap_context(self) -> Iterator[None]: + save_import_context = self.manager.errors.import_context() + self.manager.errors.set_import_context(self.import_context) + yield + self.manager.errors.set_import_context(save_import_context) + self.check_blockers() + # Methods for processing cached modules. def load_tree(self) -> None: @@ -265,10 +311,20 @@ def parse_file(self) -> None: manager = self.manager modules = manager.modules - if not self.source: - self.source = read_with_python_encoding(self.path, manager.pyversion) - self.tree = parse_file(self.id, self.path, self.source, manager) - self.source = None # We won't need it again. + with self.wrap_context(): + source = self.source + self.source = None # We won't need it again. + if self.path and source is None: + try: + source = read_with_python_encoding(self.path, manager.pyversion) + except IOError as ioerr: + raise CompileError([ + "mypy: can't read file '{}': {}".format(self.path, ioerr.strerror)]) + except UnicodeDecodeError as decodeerr: + raise CompileError([ + "mypy: can't decode file '{}': {}".format(self.path, str(decodeerr))]) + self.tree = parse_file(self.id, self.path, source, manager) + modules[self.id] = self.tree if self.tree and '.' in self.id: @@ -295,7 +351,17 @@ def parse_file(self) -> None: # Compute (direct) dependencies. # Add all direct imports (this is why we needed the first pass). - dependencies = [id for id, _ in manager.all_imported_modules_in_file(self.tree)] + # Also keep track of each dependency's source line. + dependencies = [] + dep_line_map = {} + for id, line in manager.all_imported_modules_in_file(self.tree): + # Omit missing modules, as otherwise we could not type-check + # programs with missing modules. + if id == self.id or id in manager.missing_modules or not id: + continue + if id not in dep_line_map: + dependencies.append(id) + dep_line_map[id] = line # Every module implicitly depends on builtins. if self.id != 'builtins' and 'builtins' not in dependencies: dependencies.append('builtins') @@ -310,23 +376,24 @@ def parse_file(self) -> None: print(" Cached:", self.dependencies) print(" Source:", dependencies) self.dependencies = dependencies + self.dep_line_map = dep_line_map self.check_blockers() def semantic_analysis(self) -> None: - self.manager.semantic_analyzer.visit_file(self.tree, self.xpath) - self.check_blockers() + with self.wrap_context(): + self.manager.semantic_analyzer.visit_file(self.tree, self.xpath) def semantic_analysis_pass_three(self) -> None: - self.manager.semantic_analyzer_pass3.visit_file(self.tree, self.xpath) - # TODO: DUMP_TYPE_STATS - self.check_blockers() + with self.wrap_context(): + self.manager.semantic_analyzer_pass3.visit_file(self.tree, self.xpath) + # TODO: DUMP_TYPE_STATS def type_check(self) -> None: if self.manager.target < TYPE_CHECK: return - self.manager.type_checker.visit_file(self.tree, self.xpath) - # TODO: DUMP_INFER_STATS, manager.reports.file() - self.check_blockers() + with self.wrap_context(): + self.manager.type_checker.visit_file(self.tree, self.xpath) + # TODO: DUMP_INFER_STATS, manager.reports.file() def write_cache(self) -> None: if self.path and INCREMENTAL in self.manager.flags and not self.manager.errors.is_errors(): @@ -371,19 +438,31 @@ def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph: graph = {} # type: Graph # Seed graph with roots. for bs in sources: - st = State(bs.module, bs.path, bs.text, manager) - assert st.id not in graph, "TODO: Duplicate module %s" % st.id + try: + st = State(bs.module, bs.path, bs.text, manager) + except ModuleNotFound: + continue + if st.id in graph: + manager.errors.set_file(st.xpath) + manager.errors.report(1, "Duplicate module named '%s'" % st.id) + manager.errors.raise_error() graph[st.id] = st # Collect dependencies. while True: new = {} # type: Graph for st in graph.values(): - for dep in st.dependencies: + for dep in st.roots + st.dependencies: if dep not in graph and dep not in new: - # TODO: Implement --silent-imports. - # TODO: Import context (see mypy.build.UnprocessedFile.process()). - # TODO: Error handling (ditto). - depst = State(dep, None, None, manager) + imported_from = None + if dep in st.dependencies: + imported_from = (st, st.dep_line_map.get(dep, 1)) + assert dep, (st.roots, st.dependencies, st.id) + try: + depst = State(dep, None, None, manager, imported_from=imported_from) + except ModuleNotFound: + if dep in st.dependencies: + st.dependencies.remove(dep) + continue assert depst.id not in new, "TODO: This is bad %s" % depst.id new[depst.id] = depst if not new: From 85bb6af9d0004f506a7dfc8ab76eeb0a5ed1c8d2 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 12 Mar 2016 08:20:45 -0800 Subject: [PATCH 073/117] Reworked dependency collection and import context management. --- mypy/depmgr.py | 58 ++++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/mypy/depmgr.py b/mypy/depmgr.py index 45a9e7955a8e..348a861ea719 100644 --- a/mypy/depmgr.py +++ b/mypy/depmgr.py @@ -140,6 +140,7 @@ dependency. See State.add_roots() below. """ +import collections import contextlib import json import os @@ -184,22 +185,24 @@ class State: dep_line_map = None # tyoe: Dict[str, int] # Line number where imported roots = None # type: Optional[List[str]] import_context = None # type: List[Tuple[str, int]] - imported_from = None # type: Optional[Tuple[State, int]] + caller_state = None # type: Optional[State] + caller_line = 0 def __init__(self, id: Optional[str], path: Optional[str], source: Optional[str], manager: BuildManager, - imported_from: Tuple['State', int] = None, + caller_state: 'State' = None, + caller_line: int = 0, ) -> None: assert id or path or source is not None, "Neither id, path nor source given" self.manager = manager State.order_counter += 1 self.order = State.order_counter - self.imported_from = imported_from - if imported_from: - caller_state, caller_line = imported_from + self.caller_state = caller_state + self.caller_line = caller_line + if caller_state: self.import_context = caller_state.import_context[:] self.import_context.append((caller_state.xpath, caller_line)) else: @@ -222,13 +225,12 @@ def __init__(self, # Could not find a module. Typically the reason is a # misspelled module name, missing stub, module not in # search path or the module has not been installed. - if self.imported_from: - caller_state, caller_line = self.imported_from + # TODO: Copy the check for id == '' from build.py? + if self.caller_state: if not (SILENT_IMPORTS in manager.flags or (caller_state.tree is not None and (caller_line in caller_state.tree.ignored_lines or 'import' in caller_state.tree.weak_opts))): - ##print(caller_state.import_context, self.import_context, caller_state.xpath, self.xpath, caller_state.id) save_import_context = manager.errors.import_context() manager.errors.set_import_context(caller_state.import_context) manager.module_not_found(caller_state.xpath, caller_line, id) @@ -436,6 +438,8 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> None: def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph: """Given some source files, load the full dependency graph.""" graph = {} # type: Graph + # The deque is used to implement breadth first traversal. + new = collections.deque() # type: collections.deque[State] # Seed graph with roots. for bs in sources: try: @@ -447,27 +451,25 @@ def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph: manager.errors.report(1, "Duplicate module named '%s'" % st.id) manager.errors.raise_error() graph[st.id] = st - # Collect dependencies. - while True: - new = {} # type: Graph - for st in graph.values(): - for dep in st.roots + st.dependencies: - if dep not in graph and dep not in new: - imported_from = None + new.append(st) + # Collect dependencies. We go breadth-first. + while new: + st = new.popleft() + for dep in st.roots + st.dependencies: + if dep not in graph: + try: + if dep in st.roots: + # Roots don't have import context. + newst = State(dep, None, None, manager) + else: + newst = State(dep, None, None, manager, st, st.dep_line_map.get(dep, 1)) + except ModuleNotFound: if dep in st.dependencies: - imported_from = (st, st.dep_line_map.get(dep, 1)) - assert dep, (st.roots, st.dependencies, st.id) - try: - depst = State(dep, None, None, manager, imported_from=imported_from) - except ModuleNotFound: - if dep in st.dependencies: - st.dependencies.remove(dep) - continue - assert depst.id not in new, "TODO: This is bad %s" % depst.id - new[depst.id] = depst - if not new: - break - graph.update(new) + st.dependencies.remove(dep) + else: + assert newst.id not in graph, newst.id + graph[newst.id] = newst + new.append(newst) return graph From bde24ac85a8f5a10bad0ed3ae09ddc98f3b7e2ba Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 12 Mar 2016 08:39:56 -0800 Subject: [PATCH 074/117] Fix errors about overreaching relative imports. --- mypy/depmgr.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mypy/depmgr.py b/mypy/depmgr.py index 348a861ea719..518d892c5fd1 100644 --- a/mypy/depmgr.py +++ b/mypy/depmgr.py @@ -325,7 +325,7 @@ def parse_file(self) -> None: except UnicodeDecodeError as decodeerr: raise CompileError([ "mypy: can't decode file '{}': {}".format(self.path, str(decodeerr))]) - self.tree = parse_file(self.id, self.path, source, manager) + self.tree = parse_file(self.id, self.xpath, source, manager) modules[self.id] = self.tree @@ -359,8 +359,12 @@ def parse_file(self) -> None: for id, line in manager.all_imported_modules_in_file(self.tree): # Omit missing modules, as otherwise we could not type-check # programs with missing modules. - if id == self.id or id in manager.missing_modules or not id: + if id == self.id or id in manager.missing_modules: continue + if id == '': + # Must be from a relative import. + manager.errors.set_file(self.xpath) + manager.errors.report(line, "No parent module -- cannot perform relative import", blocker=True) if id not in dep_line_map: dependencies.append(id) dep_line_map[id] = line From 84769433e1ac34ed8eeb1a116329713dd1130c2b Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 12 Mar 2016 15:39:03 -0800 Subject: [PATCH 075/117] Patch submodule into its parent in a later stage so it's more likely to work. --- mypy/depmgr.py | 25 +++++++++++++++---------- mypy/test/data/semanal-errors.test | 2 +- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/mypy/depmgr.py b/mypy/depmgr.py index 518d892c5fd1..4c77448d3870 100644 --- a/mypy/depmgr.py +++ b/mypy/depmgr.py @@ -329,16 +329,6 @@ def parse_file(self) -> None: modules[self.id] = self.tree - if self.tree and '.' in self.id: - # Include module in the symbol table of the enclosing package. - # TODO: This is weirdly optional; why is it needed? - parent, child = self.id.rsplit('.', 1) - if parent in modules: - manager.trace("Added %s.%s" % (parent, child)) - modules[parent].names[child] = SymbolTableNode(MODULE_REF, self.tree, parent) - else: - manager.log("Hm... couldn't add %s.%s" % (parent, child)) - # Do the first pass of semantic analysis: add top-level # definitions in the file to the symbol table. We must do # this before processing imports, since this may mark some @@ -385,6 +375,18 @@ def parse_file(self) -> None: self.dep_line_map = dep_line_map self.check_blockers() + def patch_parent(self) -> None: + # Include module in the symbol table of the enclosing package. + assert '.' in self.id + manager = self.manager + modules = manager.modules + parent, child = self.id.rsplit('.', 1) + if parent in modules: + manager.trace("Added %s.%s" % (parent, child)) + modules[parent].names[child] = SymbolTableNode(MODULE_REF, self.tree, parent) + else: + manager.log("Hm... couldn't add %s.%s" % (parent, child)) + def semantic_analysis(self) -> None: with self.wrap_context(): self.manager.semantic_analyzer.visit_file(self.tree, self.xpath) @@ -534,6 +536,9 @@ def process_stale_scc(graph: Graph, scc: List[str]) -> None: # We may already have parsed the module, or not. # If the former, parse_file() is a no-op. graph[id].parse_file() + # But we still need to patch a submodule into its parent package. + if '.' in id: + graph[id].patch_parent() for id in scc: graph[id].semantic_analysis() for id in scc: diff --git a/mypy/test/data/semanal-errors.test b/mypy/test/data/semanal-errors.test index 1ffd991d5af2..54cf6ec327c7 100644 --- a/mypy/test/data/semanal-errors.test +++ b/mypy/test/data/semanal-errors.test @@ -295,7 +295,7 @@ from . import m [out] main:1: error: No parent module -- cannot perform relative import -[case testRelativeImportAtTopLevelModule] +[case testRelativeImportAtTopLevelModule2] from .. import m [out] main:1: error: No parent module -- cannot perform relative import From 5dfaafb471a75a34eb5a31355f811bd5922469f7 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 12 Mar 2016 16:41:56 -0800 Subject: [PATCH 076/117] Support --[infer]stats and file reports. --- mypy/build.py | 6 +++--- mypy/depmgr.py | 26 ++++++++++++++++++-------- mypy/main.py | 2 ++ 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index cbc16a5840ac..209cfea9bf15 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -209,9 +209,9 @@ def build(sources: List[BuildSource], implicit_any=implicit_any, reports=reports) - if flags.count(INCREMENTAL) >= 2 or os.getenv("NEWINCREMENTAL"): - from mypy.depmgr import dispatch - dispatch(sources, manager) + if INCREMENTAL in flags: + import mypy.depmgr # type: ignore + mypy.depmgr.dispatch(sources, manager) # type: ignore result = BuildResult(manager.modules, manager.type_checker.type_map) else: # Construct information that describes the initial files. __main__ is the diff --git a/mypy/depmgr.py b/mypy/depmgr.py index 4c77448d3870..8e2411499ce7 100644 --- a/mypy/depmgr.py +++ b/mypy/depmgr.py @@ -148,14 +148,17 @@ from typing import Any, Dict, List, Set, AbstractSet, Tuple, Iterable, Iterator, Optional, TypeVar from mypy.build import (BuildManager, BuildSource, CacheMeta, - INCREMENTAL, FAST_PARSER, SILENT_IMPORTS, TYPE_CHECK, - find_cache_meta, find_module, read_with_python_encoding, - write_cache) + TYPE_CHECK, + INCREMENTAL, FAST_PARSER, SILENT_IMPORTS, + DUMP_TYPE_STATS, DUMP_INFER_STATS, + find_module, read_with_python_encoding, + find_cache_meta, write_cache) from mypy.errors import CompileError from mypy.fixup import fixup_module_pass_one, fixup_module_pass_two from mypy.nodes import MypyFile, SymbolTableNode, MODULE_REF from mypy.parse import parse from mypy.semanal import FirstPass +from mypy.stats import dump_type_stats class ModuleNotFound(Exception): @@ -312,6 +315,7 @@ def parse_file(self) -> None: manager = self.manager modules = manager.modules + manager.log("Parsing %s" % self.xpath) with self.wrap_context(): source = self.source @@ -354,7 +358,8 @@ def parse_file(self) -> None: if id == '': # Must be from a relative import. manager.errors.set_file(self.xpath) - manager.errors.report(line, "No parent module -- cannot perform relative import", blocker=True) + manager.errors.report(line, "No parent module -- cannot perform relative import", + blocker=True) if id not in dep_line_map: dependencies.append(id) dep_line_map[id] = line @@ -394,14 +399,19 @@ def semantic_analysis(self) -> None: def semantic_analysis_pass_three(self) -> None: with self.wrap_context(): self.manager.semantic_analyzer_pass3.visit_file(self.tree, self.xpath) - # TODO: DUMP_TYPE_STATS + if DUMP_TYPE_STATS in self.manager.flags: + dump_type_stats(self.tree, self.xpath) def type_check(self) -> None: - if self.manager.target < TYPE_CHECK: + manager = self.manager + if manager.target < TYPE_CHECK: return with self.wrap_context(): - self.manager.type_checker.visit_file(self.tree, self.xpath) - # TODO: DUMP_INFER_STATS, manager.reports.file() + manager.type_checker.visit_file(self.tree, self.xpath) + type_map = manager.type_checker.type_map + if DUMP_INFER_STATS in manager.flags: + dump_type_stats(self.tree, self.xpath, inferred=True, typemap=type_map) + manager.reports.file(self.tree, type_map=type_map) def write_cache(self) -> None: if self.path and INCREMENTAL in self.manager.flags and not self.manager.errors.is_errors(): diff --git a/mypy/main.py b/mypy/main.py index 83df106b2575..03077aa2c29c 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -110,6 +110,8 @@ def process_options(args: List[str]) -> Tuple[List[BuildSource], Options]: if args[0] in ('--verbose', '-v'): options.build_flags.append(build.VERBOSE) args = args[1:] + elif args[0] == '--py1': + fail("Are you kidding me?") elif args[0] == '--py2': # Use Python 2 mode. options.pyversion = defaults.PYTHON2_VERSION From f11dd1d7a05974640403e7f78e717e82007e8adf Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 12 Mar 2016 17:11:42 -0800 Subject: [PATCH 077/117] Call patch_parent() for fresh as well as stale modules. Minor cleanups. --- mypy/depmgr.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/mypy/depmgr.py b/mypy/depmgr.py index 8e2411499ce7..282aac49927a 100644 --- a/mypy/depmgr.py +++ b/mypy/depmgr.py @@ -143,9 +143,8 @@ import collections import contextlib import json -import os -from typing import Any, Dict, List, Set, AbstractSet, Tuple, Iterable, Iterator, Optional, TypeVar +from typing import Dict, List, Set, AbstractSet, Tuple, Iterable, Iterator, Optional from mypy.build import (BuildManager, BuildSource, CacheMeta, TYPE_CHECK, @@ -228,7 +227,6 @@ def __init__(self, # Could not find a module. Typically the reason is a # misspelled module name, missing stub, module not in # search path or the module has not been installed. - # TODO: Copy the check for id == '' from build.py? if self.caller_state: if not (SILENT_IMPORTS in manager.flags or (caller_state.tree is not None and @@ -261,7 +259,6 @@ def __init__(self, def add_roots(self) -> None: # All parent packages are new roots. - # TODO: Use build.super_packages()? roots = [] parent = self.id while '.' in parent: @@ -382,7 +379,8 @@ def parse_file(self) -> None: def patch_parent(self) -> None: # Include module in the symbol table of the enclosing package. - assert '.' in self.id + if '.' not in self.id: + return manager = self.manager modules = manager.modules parent, child = self.id.rsplit('.', 1) @@ -532,6 +530,8 @@ def process_fresh_scc(graph: Graph, scc: List[str]) -> None: """Process the modules in one SCC from their cached data.""" for id in scc: graph[id].load_tree() + for id in scc: + graph[id].patch_parent() for id in scc: graph[id].fix_cross_refs() for id in scc: @@ -546,9 +546,8 @@ def process_stale_scc(graph: Graph, scc: List[str]) -> None: # We may already have parsed the module, or not. # If the former, parse_file() is a no-op. graph[id].parse_file() - # But we still need to patch a submodule into its parent package. - if '.' in id: - graph[id].patch_parent() + for id in scc: + graph[id].patch_parent() for id in scc: graph[id].semantic_analysis() for id in scc: From 7073e25f6e7d5c0d9678b689a38b349bdf0a272a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 12 Mar 2016 18:16:40 -0800 Subject: [PATCH 078/117] Killed the old state machine. Yeah! --- mypy/build.py | 890 ++------------------------------------------------ 1 file changed, 22 insertions(+), 868 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 209cfea9bf15..98321b7afe84 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -14,24 +14,19 @@ import json import os import os.path -import shlex -import subprocess import sys import time -import re from os.path import dirname, basename -from typing import Any, Dict, List, Tuple, Iterable, cast, Set, Union, Optional, NamedTuple +from typing import Dict, List, Tuple, Iterable, Union, Optional, NamedTuple from mypy.types import Type from mypy.nodes import MypyFile, Node, Import, ImportFrom, ImportAll -from mypy.nodes import SymbolTableNode, MODULE_REF -from mypy.semanal import SemanticAnalyzer, FirstPass, ThirdPass +from mypy.nodes import MODULE_REF +from mypy.semanal import SemanticAnalyzer, ThirdPass from mypy.checker import TypeChecker -from mypy.errors import Errors, CompileError +from mypy.errors import Errors from mypy import fixup -from mypy import parse -from mypy import stats from mypy.report import Reports from mypy import defaults from mypy import moduleinfo @@ -61,38 +56,8 @@ DISALLOW_UNTYPED_CALLS = 'disallow-untyped-calls' INCREMENTAL = 'incremental' # Incremental mode: use the cache -# State ids. These describe the states a source file / module can be in a -# build. - -# We aren't processing this source file yet (no associated state object). -UNSEEN_STATE = 0 -# We're hopeful that we can load this from the cache. -PROBABLY_CACHED_STATE = 1 -# We've loaded the module from cache. -CACHE_LOADED_STATE = 2 -# We've patched up cross-references. -CACHE_PATCHED_STATE = 3 -# We've calculated MROs. -CACHE_WITH_MRO_STATE = 4 -# The source file has a state object, but we haven't done anything with it yet. -UNPROCESSED_STATE = 11 -# We've parsed the source file. -PARSED_STATE = 12 -# We've done the first two passes of semantic analysis. -PARTIAL_SEMANTIC_ANALYSIS_STATE = 13 -# We've semantically analyzed the source file. -SEMANTICALLY_ANALYSED_STATE = 14 -# We've type checked the source file (and all its dependencies). -TYPE_CHECKED_STATE = 19 - PYTHON_EXTENSIONS = ['.pyi', '.py'] -FINAL_STATE = TYPE_CHECKED_STATE - - -def earlier_state(s: int, t: int) -> bool: - return s < t - class BuildResult: """The result of a successful build. @@ -115,15 +80,6 @@ def __init__(self, path: Optional[str], module: Optional[str], self.module = module or '__main__' self.text = text - def load(self, lib_path, pyversion: Tuple[int, int]) -> str: - """Load the module if needed. This also has the side effect - of calculating the effective path for modules.""" - if self.text is not None: - return self.text - - self.path = self.path or lookup_program(self.module, lib_path) - return read_program(self.path, pyversion) - @property def effective_path(self) -> str: """Return the effective path (ie, if its from in memory)""" @@ -194,12 +150,11 @@ def build(sources: List[BuildSource], if alt_lib_path: lib_path.insert(0, alt_lib_path) - # TODO Reports is global to a build manager but only supports a single "main file" - # Fix this. + # TODO: Reports is global to a build manager but only supports a + # single "main file" Fix this. reports = Reports(sources[0].effective_path, data_dir, report_dirs) - # Construct a build manager object that performs all the stages of the - # build in the correct order. + # Construct a build manager object to hold state during the build. # # Ignore current directory prefix in error messages. manager = BuildManager(data_dir, lib_path, target, @@ -209,34 +164,15 @@ def build(sources: List[BuildSource], implicit_any=implicit_any, reports=reports) - if INCREMENTAL in flags: - import mypy.depmgr # type: ignore - mypy.depmgr.dispatch(sources, manager) # type: ignore - result = BuildResult(manager.modules, manager.type_checker.type_map) - else: - # Construct information that describes the initial files. __main__ is the - # implicit module id and the import context is empty initially ([]). - initial_states = [] # type: List[UnprocessedBase] - for source in sources: - initial_state = None # type: Optional[UnprocessedBase] - if source.module != '__main__' and source.path is not None: - initial_state = manager.maybe_make_cached_state(source.module, source.path) - # TODO: else if using '-m x' try the cache too - if initial_state is None: - content = source.load(lib_path, pyversion) - info = StateInfo(source.effective_path, source.module, [], manager) - initial_state = UnprocessedFile(info, content) - initial_states.append(initial_state) - - # Perform the build by sending the files as new file (UnprocessedFile is the - # initial state of all files) to the manager. The manager will process the - # file and all dependant modules recursively. - result = manager.process(initial_states) - - reports.finish() - manager.log("Build finished with %d modules and %d types" % - (len(result.files), len(result.types))) - return result + from mypy.depmgr import dispatch + try: + dispatch(sources, manager) + return BuildResult(manager.modules, manager.type_checker.type_map) + finally: + manager.log("Build finished with %d modules and %d types" % + (len(manager.modules), len(manager.type_checker.type_map))) + # Finish the HTML or XML reports even if CompileError was raised. + reports.finish() def default_data_dir(bin_dir: str) -> str: @@ -330,28 +266,6 @@ def default_lib_path(data_dir: str, pyversion: Tuple[int, int], return path -def lookup_program(module: str, lib_path: List[str]) -> str: - # Modules are .py or .pyi - path = find_module(module, lib_path) - if path: - return path - else: - raise CompileError([ - "mypy: can't find module '{}'".format(module)]) - - -def read_program(path: str, pyversion: Tuple[int, int]) -> str: - try: - text = read_with_python_encoding(path, pyversion) - except IOError as ioerr: - raise CompileError([ - "mypy: can't read file '{}': {}".format(path, ioerr.strerror)]) - except UnicodeDecodeError as decodeerr: - raise CompileError([ - "mypy: can't decode file '{}': {}".format(path, str(decodeerr))]) - return text - - CacheMeta = NamedTuple('CacheMeta', [('id', str), ('path', str), @@ -364,17 +278,17 @@ def read_program(path: str, pyversion: Tuple[int, int]) -> str: class BuildManager: - """This is the central class for building a mypy program. + """This class holds shared state for building a mypy program. - It coordinates parsing, import processing, semantic analysis and - type checking. It manages state objects that actually perform the - build steps. + It is used to coordinate parsing, import processing, semantic + analysis and type checking. The actual build steps are carried + out by dispatch(). Attributes: data_dir: Mypy data directory (contains stubs) target: Build target; selects which passes to perform lib_path: Library path for looking up modules - modules: Mapping of module ID to MypyFile + modules: Mapping of module ID to MypyFile (shared by the passes) semantic_analyzer: Semantic analyzer, pass 2 semantic_analyzer_pass3: @@ -383,18 +297,7 @@ class BuildManager: errors: Used for reporting all errors pyversion: Python version (major, minor) flags: Build options - states: States of all individual files that are being - processed. Each file in a build is always represented - by a single state object (after it has been encountered - for the first time). This is the only place where - states are stored. - module_files: Map from module name to source file path. There is a - 1:1 mapping between modules and source files. - module_deps: Cache for module dependencies (direct or indirect). - Item (m, n) indicates whether m depends on n (directly - or indirectly). missing_modules: Set of modules that could not be imported encountered so far - loading_cache: Cache for type-checked files """ def __init__(self, data_dir: str, @@ -425,150 +328,7 @@ def __init__(self, data_dir: str, self.modules, self.pyversion, DISALLOW_UNTYPED_CALLS in self.flags) - self.states = [] # type: List[State] - self.module_files = {} # type: Dict[str, str] - self.module_deps = {} # type: Dict[Tuple[str, str], bool] self.missing_modules = set() # type: Set[str] - self.loading_cache = {} # type: Dict[str, Optional[CacheMeta]] - - def process(self, initial_states: List['UnprocessedBase']) -> BuildResult: - """Perform a build. - - The argument is a state that represents the main program - file. This method should only be called once per a build - manager object. The return values are identical to the return - values of the build function. - """ - self.states += initial_states - for initial_state in initial_states: - self.module_files[initial_state.id] = initial_state.path - for initial_state in initial_states: - initial_state.load_dependencies() - - # Process states in a loop until all files (states) have been - # semantically analyzed or type checked (depending on target). - # - # We type check all files before the rest of the passes so that we can - # report errors and fail as quickly as possible. - while True: - # Find the next state that has all its dependencies met. - next = self.next_available_state() - if not next: - self.trace('done') - break - - if self.flags.count(VERBOSE) >= 3: - self.trace('STATES OF THE WORLD') - for s in self.states: - self.trace(' id=%-15s ready=%-5s deps=%d (%2d) %s' % - (s.id, s.is_ready(), s.num_incomplete_deps(), s.state(), s.dependencies)) - self.trace('') - - # Potentially output some debug information. - self.trace('next {} ({})'.format(next.id, next.state())) - - # Set the import context for reporting error messages correctly. - self.errors.set_import_context(next.import_context) - # Process the state. The process method is responsible for adding a - # new state object representing the new state of the file. - next.process() - - # Raise exception if the build failed. The build can fail for - # various reasons, such as parse error, semantic analysis error, - # etc. - if self.errors.is_blockers(): - self.errors.raise_error() - - # If there were no errors, all files should have been fully processed. - for s in self.states: - if s.state() != FINAL_STATE: - import pdb # type: ignore - pdb.set_trace() - assert s.state() == FINAL_STATE, ( - '{} still unprocessed in state {}'.format(s.path, s.state())) - - if self.errors.is_errors(): - self.errors.raise_error() - - return BuildResult(self.modules, self.type_checker.type_map) - - def next_available_state(self) -> 'State': - """Find a ready state (one that has all its dependencies met).""" - i = len(self.states) - 1 - while i >= 0: - if self.states[i].is_ready(): - num_incomplete = self.states[i].num_incomplete_deps() - if num_incomplete == 0: - # This is perfect; no need to look for the best match. - return self.states[i] - i -= 1 - return None - - def has_module(self, name: str) -> bool: - """Have we seen a module yet?""" - return name in self.module_files - - def file_state(self, path: str) -> int: - """Return the state of a source file. - - In particular, return UNSEEN_STATE if the file has no associated - state. - - This function does not consider any dependencies. - """ - for s in self.states: - if s.path == path: - return s.state() - return UNSEEN_STATE - - def module_state(self, name: str) -> int: - """Return the state of a module. - - In particular, return UNSEEN_STATE if the file has no associated - state. - - This considers also module dependencies. - """ - if not self.has_module(name): - return UNSEEN_STATE - state = FINAL_STATE - fs = self.file_state(self.module_files[name]) - if earlier_state(fs, state): - state = fs - return state - - def is_dep(self, m1: str, m2: str, done: Set[str] = None) -> bool: - """Does m1 import m2 directly or indirectly?""" - # Have we computed this previously? - dep = self.module_deps.get((m1, m2)) - if dep is not None: - return dep - - if not done: - done = set([m1]) - - # m1 depends on m2 iff one of the deps of m1 depends on m2. - st = self.lookup_state(m1) - for m in st.dependencies: - if m in done: - continue - done.add(m) - # Cache this dependency. - self.module_deps[m1, m] = True - # Search recursively. - if m == m2 or self.is_dep(m, m2, done): - # Yes! Mark it in the cache. - self.module_deps[m1, m2] = True - return True - # No dependency. Mark it in the cache. - self.module_deps[m1, m2] = False - return False - - def lookup_state(self, module: str) -> 'State': - for state in self.states: - if state.id == module: - return state - raise RuntimeError('%s not found' % module) def all_imported_modules_in_file(self, file: MypyFile) -> List[Tuple[str, int]]: @@ -629,15 +389,6 @@ def module_not_found(self, path: str, line: int, id: str) -> None: self.errors.report(line, "(Perhaps setting MYPYPATH would help)", severity='note', only_once=True) - def maybe_make_cached_state(self, id: str, path: str) -> Optional['UnprocessedBase']: - if INCREMENTAL not in self.flags: - return None - m = find_cache_meta(id, path, self) - if m is None: - return None - info = StateInfo(path, id, self.errors.import_context(), self) - return ProbablyCachedFile(info, m) - def log(self, *message: str) -> None: if VERBOSE in self.flags: print('%.3f:LOG: ' % (time.time() - self.start_time), *message, file=sys.stderr) @@ -675,574 +426,7 @@ def remove_cwd_prefix_from_path(p: str) -> str: return p -# TODO: Use a NamedTuple? -class StateInfo: - """Description of a source file that is being built.""" - - def __init__(self, path: str, id: str, - import_context: List[Tuple[str, int]], - manager: BuildManager) -> None: - """Initialize state information. - - Arguments: - path: Path to the file - id: Module id, such as 'os.path' or '__main__' (for the main - program file) - import_context: - The import trail that caused this module to be - imported (path, line) tuples - manager: The manager that manages this build - """ - self.path = path - self.id = id - self.import_context = import_context - self.manager = manager - - -class State: - """Abstract base class for build states. - - There is always at most one state per source file. - """ - - # The StateInfo attributes are duplicated here for convenience. - # TODO: Why not just inherit from StateInfo? - path = '' - id = '' - import_context = None # type: List[Tuple[str, int]] - manager = None # type: BuildManager - # Modules that this file directly depends on (in no particular order). - dependencies = None # type: List[str] - - def __init__(self, info: StateInfo) -> None: - self.path = info.path - self.id = info.id - self.import_context = info.import_context - self.manager = info.manager - self.dependencies = [] - - def info(self) -> StateInfo: - return StateInfo(self.path, self.id, self.import_context, self.manager) - - def process(self) -> None: - raise RuntimeError('Not implemented') - - def is_ready(self) -> bool: - """Return True if all dependencies are at least in the same state - as this object (but not in the initial state). - """ - for module in self.dependencies: - state = self.manager.module_state(module) - if earlier_state(state, - self.state()) or state == UNPROCESSED_STATE: - return False - return True - - def num_incomplete_deps(self) -> int: - """Return the number of dependencies that are ready but incomplete.""" - return 0 # Does not matter in this state - - def state(self) -> int: - raise RuntimeError('Not implemented') - - def switch_state(self, state_object: 'State') -> None: - """Called by state objects to replace the state of the file. - - Also notify the manager. - """ - # TODO: Make this a method on the manager? - for i in range(len(self.manager.states)): - if self.manager.states[i].path == state_object.path: - self.manager.states[i] = state_object - self.manager.trace('switch {} ({})'.format(state_object.id, - state_object.state())) - return - raise RuntimeError('State for {} not found'.format(state_object.path)) - - def errors(self) -> Errors: - return self.manager.errors - - def semantic_analyzer(self) -> SemanticAnalyzer: - return self.manager.semantic_analyzer - - def semantic_analyzer_pass3(self) -> ThirdPass: - return self.manager.semantic_analyzer_pass3 - - def type_checker(self) -> TypeChecker: - return self.manager.type_checker - - def fail(self, path: str, line: int, msg: str, blocker: bool = True) -> None: - """Report an error in the build (e.g. if could not find a module).""" - self.errors().set_file(path) - self.errors().report(line, msg, blocker=blocker) - - -class UnprocessedBase(State): - def __init__(self, info: StateInfo) -> None: - super().__init__(info) - self.silent = SILENT_IMPORTS in self.manager.flags - - def load_dependencies(self) -> None: - # TODO: @abstractmethod - """Finish initialization by adding dependencies. - - This should call import_module() for each dependency and if - that succeeds append it to self.dependencies. - - This cannot be done in __init__() because the new state must - first be added to the manager, so that cyclic imports don't - cause an infinite regress. - """ - raise NotImplementedError - - def import_module(self, id: str) -> bool: - """Schedule a module to be processed. - - Add an unprocessed state object corresponding to the module to the - manager, or do nothing if the module already has a state object. - """ - if self.manager.has_module(id): - # Do nothing: already being compiled. - return True - - if id == 'builtins' and self.manager.pyversion[0] == 2: - # The __builtin__ module is called internally by mypy 'builtins' in Python 2 mode - # (similar to Python 3), but the stub file is __builtin__.pyi. The reason is that - # a lot of code hard codes 'builtins.x' and this it's easier to work it around like - # this. It also means that the implementation can mostly ignore the difference and - # just assume 'builtins' everywhere, which simplifies code. - file_id = '__builtin__' - else: - file_id = id - - path = find_module(file_id, self.manager.lib_path) - if path is None: - return False - - new_file = self.manager.maybe_make_cached_state(id, path) - if new_file is not None: - # TODO: Refactor so this manager update dance only occurs once? - self.manager.states.append(new_file) - self.manager.module_files[id] = path - new_file.load_dependencies() - return True - - path, text = read_module_source_from_file(file_id, self.manager.lib_path, - self.manager.pyversion, self.silent) - if text is not None: - info = StateInfo(path, id, self.errors().import_context(), - self.manager) - new_file = UnprocessedFile(info, text) - self.manager.states.append(new_file) - self.manager.module_files[id] = path - new_file.load_dependencies() - return True - else: - return False - - -class ProbablyCachedFile(UnprocessedBase): - def __init__(self, info: StateInfo, meta: CacheMeta) -> None: - super().__init__(info) - self.meta = meta - - def is_ready(self) -> bool: - """Return True if all dependencies are at least in the same state - as this object (but not in the initial state), *and* the transitive - closure of dependencies is too. - """ - my_state = self.state() - # To avoid quadratic behavior of repeatedly calling module_state(), - # just loop once over all states. Note that is_dep() is heavily cached. - for state_obj in self.manager.states: - if self.manager.is_dep(self.id, state_obj.id): - if earlier_state(state_obj.state(), my_state): - return False - return True - - def load_dependencies(self): - deps = self.meta.dependencies[:] - if self.id != 'builtins' and 'builtins' not in deps: - deps.append('builtins') # Even cached modules need this. - for dep_id in deps + super_packages(self.id): - if dep_id not in self.dependencies: - if self.import_module(dep_id): - self.dependencies.append(dep_id) - # TODO: else fail(...) - - def process(self) -> None: - """Transition to either UnprocessedFile or CacheLoadedFile. - - We've been waiting for results on the dependencies. If all - dependencies have now transitioned to eith CacheLoadedFile - (meaning their own dependencies were found good, except for - cycles) or from there to TypeCheckedFile (note that we check - that meta is not None) then we can in turn (try to) transition - to CacheLoadedFile. This could still fail due to a race - condition (if the data file's mtime). - - If any dependency was not loaded from cache or loading the - data failed, we fall back to reading the source, by switching - to an UnprocessedFile. - """ - ok = True - for dep_id in self.dependencies: - state_obj = self.manager.lookup_state(dep_id) - if isinstance(state_obj, - (ProbablyCachedFile, CacheLoadedFile, - CachePatchedFile, CacheWithMroFile)): - continue - if isinstance(state_obj, TypeCheckedFile) and state_obj.meta: - continue - self.manager.log('Abandoning cached data for {} ' - 'because {} changed ({})'.format(self.id, state_obj.id, - state_obj.__class__.__name__)) - ok = False - break - if ok: - # TODO: Errors - with open(self.meta.data_json) as f: - data = json.load(f) - if os.path.getmtime(self.meta.data_json) != self.meta.data_mtime: - self.manager.log('Abandoning cached data for {} ' - 'due to race condition'.format(self.id)) - ok = False - file = None # type: State - if ok: - file = CacheLoadedFile(self.info(), self.meta, data) - else: - # Didn't work -- construct an UnprocessedFile. - text = read_with_python_encoding(self.path, self.manager.pyversion) - # TODO: Errors - assert text is not None - file = UnprocessedFile(self.info(), text) - self.switch_state(file) - - def state(self) -> int: - return PROBABLY_CACHED_STATE - - -class CacheLoadedFile(State): - # TODO: Deserialize tree in caller? - def __init__(self, info: StateInfo, meta: CacheMeta, data: Any) -> None: - super().__init__(info) - self.meta = meta - self.dependencies.extend(meta.dependencies) - if self.id != 'builtins': - self.dependencies.append('builtins') # Even cached modules need this. - for dep_id in super_packages(self.id): - if dep_id not in self.dependencies: - self.dependencies.append(dep_id) - - # Deserialize the tree now. - self.tree = MypyFile.deserialize(data) - - # Store the parsed module in the shared module symbol table. - self.manager.modules[self.id] = self.tree - - def is_ready(self) -> bool: - """Return True if all dependencies are at least in the same state - as this object (but not in the initial state), *and* the transitive - closure of dependencies is too. - """ - my_state = self.state() - # To avoid quadratic behavior of repeatedly calling module_state(), - # just loop once over all states. Note that is_dep() is heavily cached. - for state_obj in self.manager.states: - if self.manager.is_dep(self.id, state_obj.id): - if earlier_state(state_obj.state(), my_state): - return False - return True - - def process(self) -> None: - """Patch up cross-references and Transition to CachePatchedFile.""" - self.manager.log('FIXING MODULE PASS ONE {}'.format(self.id)) - fixup.fixup_module_pass_one(self.tree, self.manager.modules) - file = CachePatchedFile(self.info(), self.tree, self.meta) - self.switch_state(file) - - def state(self) -> int: - return CACHE_LOADED_STATE - - -# TODO: Inherit from CacheLoadedFile? -class CachePatchedFile(State): - def __init__(self, info: StateInfo, tree: MypyFile, meta: CacheMeta) -> None: - super().__init__(info) - self.tree = tree - self.meta = meta - self.dependencies.extend(meta.dependencies) - if self.id != 'builtins': - self.dependencies.append('builtins') # Even cached modules need this. - for dep_id in super_packages(self.id): - if dep_id not in self.dependencies: - self.dependencies.append(dep_id) - - def is_ready(self) -> bool: - """Return True if all dependencies are at least in the same state - as this object (but not in the initial state), *and* the transitive - closure of dependencies is too. - """ - my_state = self.state() - # To avoid quadratic behavior of repeatedly calling module_state(), - # just loop once over all states. Note that is_dep() is heavily cached. - for state_obj in self.manager.states: - if self.manager.is_dep(self.id, state_obj.id): - if earlier_state(state_obj.state(), my_state): - return False - return True - - def process(self) -> None: - """Calculate all MROs and transition to CacheWithMroFile.""" - self.manager.log('FIXING MODULE PASS TWO {}'.format(self.id)) - fixup.fixup_module_pass_two(self.tree, self.manager.modules) - file = CacheWithMroFile(self.info(), self.tree, self.meta) - self.switch_state(file) - - def state(self) -> int: - return CACHE_PATCHED_STATE - - -class CacheWithMroFile(CachePatchedFile): - def process(self) -> None: - """Transition to TypeCheckedFile.""" - file = TypeCheckedFile(self.info(), self.tree, self.meta) - self.switch_state(file) - - def state(self) -> int: - return CACHE_WITH_MRO_STATE - - -class UnprocessedFile(UnprocessedBase): - def __init__(self, info: StateInfo, program_text: str) -> None: - super().__init__(info) - self.program_text = program_text - - def load_dependencies(self): - # Add surrounding (ancestor) package(s) as dependencies. - for p in super_packages(self.id): - if p in self.manager.missing_modules: - continue - if not self.import_module(p): - # Could not find a module. Typically the reason is a - # misspelled module name, missing stub, module not in - # search path or the module has not been installed. - if self.silent: - self.manager.missing_modules.add(p) - else: - self.manager.module_not_found(self.path, 1, p) - else: - self.dependencies.append(p) - - def process(self) -> None: - """Parse the file, store global names and advance to the next state.""" - if self.id in self.manager.modules: - self.fail(self.path, 1, "Duplicate module named '{}'".format(self.id)) - return - - tree = self.parse(self.program_text, self.path) - - # Store the parsed module in the shared module symbol table. - modules = self.manager.modules - modules[self.id] = tree - - if '.' in self.id: - # Include module in the symbol table of the enclosing package. - c = self.id.split('.') - p = '.'.join(c[:-1]) - if p in modules: - modules[p].names[c[-1]] = SymbolTableNode(MODULE_REF, tree, p) - - if self.id != 'builtins': - # The builtins module is imported implicitly in every program (it - # contains definitions of int, print etc.). - self.manager.trace('import builtins') - if not self.import_module('builtins'): - self.fail(self.path, 1, 'Could not find builtins') - - # Do the first pass of semantic analysis: add top-level definitions in - # the file to the symbol table. We must do this before processing imports, - # since this may mark some import statements as unreachable. - first = FirstPass(self.semantic_analyzer()) - first.analyze(tree, self.path, self.id) - - # Initialize module symbol table, which was populated by the semantic - # analyzer. - tree.names = self.semantic_analyzer().globals - - # Add all directly imported modules to be processed (however they are - # not processed yet, just waiting to be processed). - for id, line in self.manager.all_imported_modules_in_file(tree): - self.errors().push_import_context(self.path, line) - try: - res = self.import_module(id) - finally: - self.errors().pop_import_context() - if not res: - if id == '': - # Must be from a relative import. - self.fail(self.path, line, - "No parent module -- cannot perform relative import".format(id), - blocker=True) - else: - if (line not in tree.ignored_lines and - 'import' not in tree.weak_opts and - not self.silent): - self.manager.module_not_found(self.path, line, id) - self.manager.missing_modules.add(id) - - # Replace this state object with a parsed state in BuildManager. - self.switch_state(ParsedFile(self.info(), tree)) - - def parse(self, source_text: Union[str, bytes], fnam: str) -> MypyFile: - """Parse the source of a file with the given name. - - Raise CompileError if there is a parse error. - """ - num_errs = self.errors().num_messages() - tree = parse.parse(source_text, fnam, self.errors(), - pyversion=self.manager.pyversion, - custom_typing_module=self.manager.custom_typing_module, - implicit_any=self.manager.implicit_any, - fast_parser=FAST_PARSER in self.manager.flags) - tree._fullname = self.id - if self.errors().num_messages() != num_errs: - self.errors().raise_error() - return tree - - def state(self) -> int: - return UNPROCESSED_STATE - - -class ParsedFile(State): - tree = None # type: MypyFile - meta = None # type: Optional[CacheMeta] - - def __init__(self, info: StateInfo, tree: MypyFile, - meta: CacheMeta = None) -> None: - super().__init__(info) - self.tree = tree - self.meta = meta - - if meta is not None: - imp = meta.dependencies - else: - # Build a list all directly imported moules (dependencies). - imp = [] - for id, line in self.manager.all_imported_modules_in_file(tree): - # Omit missing modules, as otherwise we could not type check - # programs with missing modules. - if id not in self.manager.missing_modules and id != self.id: - imp.append(id) - if self.id != 'builtins': - imp.append('builtins') - - if imp != []: - self.manager.trace('{} dependencies: {}'.format(info.id, imp)) - - # Record the dependencies. Note that the dependencies list also - # contains any superpackages and we must preserve them (e.g. os for - # os.path). XXX NOT ACTUALLY TRUE XXX - self.dependencies.extend(imp) - - def process(self) -> None: - """Semantically analyze file and advance to the next state.""" - self.semantic_analyzer().visit_file(self.tree, self.tree.path) - self.switch_state(PartiallySemanticallyAnalyzedFile(self.info(), - self.tree)) - - def num_incomplete_deps(self) -> int: - """Return the number of dependencies that are incomplete. - - Here complete means that their state is *later* than this module. - Cyclic dependencies are omitted to break cycles forcibly (and somewhat - arbitrarily). - """ - incomplete = 0 - for module in self.dependencies: - state = self.manager.module_state(module) - if (not earlier_state(self.state(), state) and - not self.manager.is_dep(module, self.id)): - incomplete += 1 - return incomplete - - def state(self) -> int: - return PARSED_STATE - - -class PartiallySemanticallyAnalyzedFile(ParsedFile): - def process(self) -> None: - """Perform final pass of semantic analysis and advance state.""" - self.semantic_analyzer_pass3().visit_file(self.tree, self.tree.path) - if DUMP_TYPE_STATS in self.manager.flags: - stats.dump_type_stats(self.tree, self.tree.path) - self.switch_state(SemanticallyAnalyzedFile(self.info(), self.tree)) - - def state(self) -> int: - return PARTIAL_SEMANTIC_ANALYSIS_STATE - - -class SemanticallyAnalyzedFile(ParsedFile): - def process(self) -> None: - """Type check file and advance to the next state.""" - if self.manager.target >= TYPE_CHECK: - self.type_checker().visit_file(self.tree, self.tree.path) - if DUMP_INFER_STATS in self.manager.flags: - stats.dump_type_stats(self.tree, self.tree.path, inferred=True, - typemap=self.type_checker().type_map) - self.manager.reports.file(self.tree, type_map=self.type_checker().type_map) - - # FIX remove from active state list to speed up processing - - file = TypeCheckedFile(self.info(), self.tree) - if INCREMENTAL in self.manager.flags: - dump_to_json(file, self.manager) - self.switch_state(file) - - def state(self) -> int: - return SEMANTICALLY_ANALYSED_STATE - - -class TypeCheckedFile(SemanticallyAnalyzedFile): - def process(self) -> None: - """Finished, so cannot process.""" - raise RuntimeError('Cannot process TypeCheckedFile') - - def is_ready(self) -> bool: - """Finished, so cannot ever become ready.""" - return False - - def state(self) -> int: - return TYPE_CHECKED_STATE - - -def read_module_source_from_file(id: str, - lib_path: Iterable[str], - pyversion: Tuple[int, int], - silent: bool) -> Tuple[Optional[str], Optional[str]]: - """Find and read the source file of a module. - - Return a pair (path, file contents). Return (None, None) if the module - could not be found or read. - - Args: - id: module name, a string of form 'foo' or 'foo.bar' - lib_path: library search path - silent: if set, don't import .py files (only .pyi files) - """ - path = find_module(id, lib_path) - if path is not None: - if silent and not path.endswith('.pyi'): - return None, None - try: - text = read_with_python_encoding(path, pyversion) - except IOError: - return None, None - return path, text - else: - return None, None - - +# TODO: Maybe move this into BuildManager? # Cache find_module: (id, lib_path) -> result. find_module_cache = {} # type: Dict[Tuple[str, Tuple[str, ...]], str] @@ -1346,23 +530,6 @@ def verify_module(id: str, path: str) -> bool: return True -def super_packages(id: str) -> List[str]: - """Return the surrounding packages of a module, e.g. ['os'] for os.path.""" - c = id.split('.') - res = [] # type: List[str] - for i in range(1, len(c)): - res.append('.'.join(c[:i])) - return res - - -def make_parent_dirs(path: str) -> None: - parent = os.path.dirname(path) - try: - os.makedirs(parent) - except OSError: - pass - - def read_with_python_encoding(path: str, pyversion: Tuple[int, int]) -> str: """Read the Python file with while obeying PEP-263 encoding detection""" source_bytearray = bytearray() @@ -1388,9 +555,6 @@ def read_with_python_encoding(path: str, pyversion: Tuple[int, int]) -> str: return source_bytearray.decode(encoding) -# Experimental incremental loading -# TODO: Flags (e.g. py2, implicit-any) - MYPY_CACHE = '.mypy_cache' @@ -1445,16 +609,6 @@ def random_string(): return binascii.hexlify(os.urandom(8)).decode('ascii') -def dump_to_json(file: TypeCheckedFile, manager: BuildManager) -> None: - id = file.id - if id == '__main__': - return - path = file.path - if path == '': - return - write_cache(id, path, file.tree, file.dependencies, manager) - - def write_cache(id: str, path: str, tree: MypyFile, dependencies: List[str], manager: BuildManager) -> None: path = os.path.abspath(path) From 0b4ef76db49c0cb3719ef7d3c6688bcfc2c81b3a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 12 Mar 2016 18:26:37 -0800 Subject: [PATCH 079/117] Merged depmgr into build, to avoid a nasty import cycle. --- mypy/build.py | 635 +++++++++++++++++++++++++++++++++++++++++++++++- mypy/depmgr.py | 637 ------------------------------------------------- 2 files changed, 629 insertions(+), 643 deletions(-) delete mode 100644 mypy/depmgr.py diff --git a/mypy/build.py b/mypy/build.py index 98321b7afe84..ad756671d3b5 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -11,6 +11,8 @@ # TODO: More consistent terminology, e.g. path/fnam, module/id, state/file import binascii +import collections +import contextlib import json import os import os.path @@ -18,19 +20,23 @@ import time from os.path import dirname, basename -from typing import Dict, List, Tuple, Iterable, Union, Optional, NamedTuple +from typing import (AbstractSet, Dict, Iterable, Iterator, List, + NamedTuple, Optional, Set, Tuple, Union) from mypy.types import Type -from mypy.nodes import MypyFile, Node, Import, ImportFrom, ImportAll -from mypy.nodes import MODULE_REF -from mypy.semanal import SemanticAnalyzer, ThirdPass +from mypy.nodes import (MypyFile, Node, Import, ImportFrom, ImportAll, + SymbolTableNode, MODULE_REF) +from mypy.semanal import FirstPass, SemanticAnalyzer, ThirdPass from mypy.checker import TypeChecker -from mypy.errors import Errors +from mypy.errors import Errors, CompileError from mypy import fixup from mypy.report import Reports from mypy import defaults from mypy import moduleinfo from mypy import util +from mypy.fixup import fixup_module_pass_one, fixup_module_pass_two +from mypy.parse import parse +from mypy.stats import dump_type_stats # We need to know the location of this file to load data, but @@ -164,7 +170,6 @@ def build(sources: List[BuildSource], implicit_any=implicit_any, reports=reports) - from mypy.depmgr import dispatch try: dispatch(sources, manager) return BuildResult(manager.modules, manager.type_checker.type_map) @@ -693,3 +698,621 @@ def write_cache(id: str, path: str, tree: MypyFile, dependencies: List[str], import pdb # type: ignore pdb.set_trace() print() + + +"""Dependency manager. + +Design +====== + +Ideally +------- + +A. Collapse cycles (each SCC -- strongly connected component -- + becomes one "supernode"). + +B. Topologically sort nodes based on dependencies. + +C. Process from leaves towards roots. + +Wrinkles +-------- + +a. Need to parse source modules to determine dependencies. + +b. Processing order for modules within an SCC. + +c. Must order mtimes of files to decide whether to re-process; depends + on clock never resetting. + +d. from P import M; checks filesystem whether module P.M exists in + filesystem. + +e. Race conditions, where somebody modifies a file while we're + processing. I propose not to modify the algorithm to handle this, + but to detect when this could lead to inconsistencies. (For + example, when we decide on the dependencies based on cache + metadata, and then we decide to re-parse a file because of a stale + dependency, if the re-parsing leads to a different list of + dependencies we should warn the user or start over.) + +Steps +----- + +1. For each explicitly given module find the source file location. + +2. For each such module load and check the cache metadata, and decide + whether it's valid. + +3. Now recursively (or iteratively) find dependencies and add those to + the graph: + + - for cached nodes use the list of dependencies from the cache + metadata (this will be valid even if we later end up re-parsing + the same source); + + - for uncached nodes parse the file and process all imports found, + taking care of (a) above. + +Step 3 should also address (d) above. + +Once step 3 terminates we have the entire dependency graph, and for +each module we've either loaded the cache metadata or parsed the +source code. (However, we may still need to parse those modules for +which we have cache metadata but that depend, directly or indirectly, +on at least one module for which the cache metadata is stale.) + +Now we can execute steps A-C from the first section. Finding SCCs for +step A shouldn't be hard; there's a recipe here: +http://code.activestate.com/recipes/578507/. There's also a plethora +of topsort recipes, e.g. http://code.activestate.com/recipes/577413/. + +For single nodes, processing is simple. If the node was cached, we +deserialize the cache data and fix up cross-references. Otherwise, we +do semantic analysis followed by type checking. We also handle (c) +above; if a module has valid cache data *but* any of its +dependendencies was processed from source, then the module should be +processed from source. + +A relatively simple optimization (outside SCCs) we might do in the +future is as follows: if a node's cache data is valid, but one or more +of its dependencies are out of date so we have to re-parse the node +from source, once we have fully type-checked the node, we can decide +whether its symbol table actually changed compared to the cache data +(by reading the cache data and comparing it to the data we would be +writing). If there is no change we can declare the node up to date, +and any node that depends (and for which we have cached data, and +whose other dependencies are up to date) on it won't need to be +re-parsed from source. + +Import cycles +------------- + +Finally we have to decide how to handle (c), import cycles. Here +we'll need a modified version of the original state machine +(build.py), but we only need to do this per SCC, and we won't have to +deal with changes to the list of nodes while we're processing it. + +If all nodes in the SCC have valid cache metadata and all dependencies +outside the SCC are still valid, we can proceed as follows: + + 1. Load cache data for all nodes in the SCC. + + 2. Fix up cross-references for all nodes in the SCC. + +Otherwise, the simplest (but potentially slow) way to proceed is to +invalidate all cache data in the SCC and re-parse all nodes in the SCC +from source. We can do this as follows: + + 1. Parse source for all nodes in the SCC. + + 2. Semantic analysis for all nodes in the SCC. + + 3. Type check all nodes in the SCC. + +(If there are more passes the process is the same -- each pass should +be done for all nodes before starting the next pass for any nodes in +the SCC.) + +We could process the nodes in the SCC in any order. For sentimental +reasons, I've decided to process them in the reverse order in which we +encountered them when originally constructing the graph. That's how +the old build.py deals with cycles, and at least this reproduces the +previous implementation more accurately. + +Can we do better than re-parsing all nodes in the SCC when any of its +dependencies are out of date? It's doubtful. The optimization +mentioned at the end of the previous section would require re-parsing +and type-checking a node and then comparing its symbol table to the +cached data; but because the node is part of a cycle we can't +technically type-check it until the semantic analysis of all other +nodes in the cycle has completed. (This is an important issue because +we have a cycle of over 500 modules in the server repo. But I'd like +to deal with it later.) + +Additional wrinkles +------------------- + +During implementation more wrinkles were found. + +- When a submodule of a package (e.g. x.y) is encountered, the parent + package (e.g. x) must also be loaded, but it is not strictly a + dependency. See State.add_roots() below. +""" + + +class ModuleNotFound(Exception): + """Control flow exception to signal that a module was not found.""" + + +class State: + """The state for a module. + + It's a package if path ends in __init__.py[i]. + + The source is only used for the -c command line option; in that + case path is None. Otherwise source is None and path isn't. + """ + + manager = None # type: BuildManager + order_counter = 0 # Class variable + order = None # type: int # Order in which modules were encountered + id = None # type: str # Fully qualified module name + path = None # type: Optional[str] # Path to module source + xpath = None # type: str # Path or '' + source = None # type: Optional[str] # Module source code + meta = None # type: Optional[CacheMeta] + data = None # type: Optional[str] + tree = None # type: Optional[MypyFile] + dependencies = None # type: List[str] + dep_line_map = None # tyoe: Dict[str, int] # Line number where imported + roots = None # type: Optional[List[str]] + import_context = None # type: List[Tuple[str, int]] + caller_state = None # type: Optional[State] + caller_line = 0 + + def __init__(self, + id: Optional[str], + path: Optional[str], + source: Optional[str], + manager: BuildManager, + caller_state: 'State' = None, + caller_line: int = 0, + ) -> None: + assert id or path or source is not None, "Neither id, path nor source given" + self.manager = manager + State.order_counter += 1 + self.order = State.order_counter + self.caller_state = caller_state + self.caller_line = caller_line + if caller_state: + self.import_context = caller_state.import_context[:] + self.import_context.append((caller_state.xpath, caller_line)) + else: + self.import_context = [] + self.id = id or '__main__' + if not path and source is None: + file_id = id + if id == 'builtins' and manager.pyversion[0] == 2: + # The __builtin__ module is called internally by mypy + # 'builtins' in Python 2 mode (similar to Python 3), + # but the stub file is __builtin__.pyi. The reason is + # that a lot of code hard-codes 'builtins.x' and it's + # easier to work it around like this. It also means + # that the implementation can mostly ignore the + # difference and just assume 'builtins' everywhere, + # which simplifies code. + file_id = '__builtin__' + path = find_module(file_id, manager.lib_path) + if not path: + # Could not find a module. Typically the reason is a + # misspelled module name, missing stub, module not in + # search path or the module has not been installed. + if self.caller_state: + if not (SILENT_IMPORTS in manager.flags or + (caller_state.tree is not None and + (caller_line in caller_state.tree.ignored_lines or + 'import' in caller_state.tree.weak_opts))): + save_import_context = manager.errors.import_context() + manager.errors.set_import_context(caller_state.import_context) + manager.module_not_found(caller_state.xpath, caller_line, id) + manager.errors.set_import_context(save_import_context) + manager.missing_modules.add(id) + raise ModuleNotFound + else: + # If this is a root it's always fatal. + # TODO: This might hide non-fatal errors from + # roots processed earlier. + raise CompileError(["mypy: can't find module '%s'" % id]) + self.path = path + self.xpath = path or '' + self.source = source + if path and source is None and INCREMENTAL in manager.flags: + self.meta = find_cache_meta(self.id, self.path, manager) + # TODO: Get mtime if not cached. + self.add_roots() + if self.meta: + self.dependencies = self.meta.dependencies + self.dep_line_map = {} + else: + # Parse the file (and then some) to get the dependencies. + self.parse_file() + + def add_roots(self) -> None: + # All parent packages are new roots. + roots = [] + parent = self.id + while '.' in parent: + parent, _ = parent.rsplit('.', 1) + roots.append(parent) + self.roots = roots + + def is_fresh(self) -> bool: + """Return whether the cache data for this file is fresh.""" + return self.meta is not None + + def clear_fresh(self) -> None: + """Throw away the cache data for this file, marking it as stale.""" + self.meta = None + + def check_blockers(self) -> None: + """Raise CompileError if a blocking error is detected.""" + if self.manager.errors.is_blockers(): + self.manager.log("Bailing due to blocking errors") + self.manager.errors.raise_error() + + @contextlib.contextmanager + def wrap_context(self) -> Iterator[None]: + save_import_context = self.manager.errors.import_context() + self.manager.errors.set_import_context(self.import_context) + yield + self.manager.errors.set_import_context(save_import_context) + self.check_blockers() + + # Methods for processing cached modules. + + def load_tree(self) -> None: + with open(self.meta.data_json) as f: + data = json.load(f) + # TODO: Assert data file wasn't changed. + self.tree = MypyFile.deserialize(data) + self.manager.modules[self.id] = self.tree + + def fix_cross_refs(self) -> None: + fixup_module_pass_one(self.tree, self.manager.modules) + + def calculate_mros(self) -> None: + fixup_module_pass_two(self.tree, self.manager.modules) + + # Methods for processing modules from source code. + + def parse_file(self) -> None: + if self.tree is not None: + # The file was already parsed (in __init__()). + return + + manager = self.manager + modules = manager.modules + manager.log("Parsing %s" % self.xpath) + + with self.wrap_context(): + source = self.source + self.source = None # We won't need it again. + if self.path and source is None: + try: + source = read_with_python_encoding(self.path, manager.pyversion) + except IOError as ioerr: + raise CompileError([ + "mypy: can't read file '{}': {}".format(self.path, ioerr.strerror)]) + except UnicodeDecodeError as decodeerr: + raise CompileError([ + "mypy: can't decode file '{}': {}".format(self.path, str(decodeerr))]) + self.tree = parse_file(self.id, self.xpath, source, manager) + + modules[self.id] = self.tree + + # Do the first pass of semantic analysis: add top-level + # definitions in the file to the symbol table. We must do + # this before processing imports, since this may mark some + # import statements as unreachable. + first = FirstPass(manager.semantic_analyzer) + first.analyze(self.tree, self.xpath, self.id) + + # Initialize module symbol table, which was populated by the + # semantic analyzer. + # TODO: Why can't FirstPass .analyze() do this? + self.tree.names = manager.semantic_analyzer.globals + + # Compute (direct) dependencies. + # Add all direct imports (this is why we needed the first pass). + # Also keep track of each dependency's source line. + dependencies = [] + dep_line_map = {} # type: Dict[str, int] # id -> line + for id, line in manager.all_imported_modules_in_file(self.tree): + # Omit missing modules, as otherwise we could not type-check + # programs with missing modules. + if id == self.id or id in manager.missing_modules: + continue + if id == '': + # Must be from a relative import. + manager.errors.set_file(self.xpath) + manager.errors.report(line, "No parent module -- cannot perform relative import", + blocker=True) + if id not in dep_line_map: + dependencies.append(id) + dep_line_map[id] = line + # Every module implicitly depends on builtins. + if self.id != 'builtins' and 'builtins' not in dependencies: + dependencies.append('builtins') + + # If self.dependencies is already set, it was read from the + # cache, but for some reason we're re-parsing the file. + # Double-check that the dependencies still match (otherwise + # the graph is out of date). + if self.dependencies is not None and dependencies != self.dependencies: + # TODO: Make this into a reasonable error message. + print("HELP!! Dependencies changed!") # Probably the file was edited. + print(" Cached:", self.dependencies) + print(" Source:", dependencies) + self.dependencies = dependencies + self.dep_line_map = dep_line_map + self.check_blockers() + + def patch_parent(self) -> None: + # Include module in the symbol table of the enclosing package. + if '.' not in self.id: + return + manager = self.manager + modules = manager.modules + parent, child = self.id.rsplit('.', 1) + if parent in modules: + manager.trace("Added %s.%s" % (parent, child)) + modules[parent].names[child] = SymbolTableNode(MODULE_REF, self.tree, parent) + else: + manager.log("Hm... couldn't add %s.%s" % (parent, child)) + + def semantic_analysis(self) -> None: + with self.wrap_context(): + self.manager.semantic_analyzer.visit_file(self.tree, self.xpath) + + def semantic_analysis_pass_three(self) -> None: + with self.wrap_context(): + self.manager.semantic_analyzer_pass3.visit_file(self.tree, self.xpath) + if DUMP_TYPE_STATS in self.manager.flags: + dump_type_stats(self.tree, self.xpath) + + def type_check(self) -> None: + manager = self.manager + if manager.target < TYPE_CHECK: + return + with self.wrap_context(): + manager.type_checker.visit_file(self.tree, self.xpath) + type_map = manager.type_checker.type_map + if DUMP_INFER_STATS in manager.flags: + dump_type_stats(self.tree, self.xpath, inferred=True, typemap=type_map) + manager.reports.file(self.tree, type_map=type_map) + + def write_cache(self) -> None: + if self.path and INCREMENTAL in self.manager.flags and not self.manager.errors.is_errors(): + write_cache(self.id, self.path, self.tree, list(self.dependencies), self.manager) + + +# TODO: This would make a nice method on BuildManager. +def parse_file(id: str, path: str, source: str, manager: BuildManager) -> MypyFile: + """Parse the source of a file with the given name. + + Raise CompileError if there is a parse error. + """ + errors = manager.errors + num_errs = errors.num_messages() + tree = parse(source, path, errors, + pyversion=manager.pyversion, + custom_typing_module=manager.custom_typing_module, + implicit_any=manager.implicit_any, + fast_parser=FAST_PARSER in manager.flags) + tree._fullname = id + if errors.num_messages() != num_errs: + manager.log("Bailing due to parse errors") + errors.raise_error() + return tree + + +Graph = Dict[str, State] + + +def dispatch(sources: List[BuildSource], manager: BuildManager) -> None: + manager.log("Using new dependency manager") + graph = load_graph(sources, manager) + manager.log("Loaded graph with %d nodes" % len(graph)) + process_graph(graph, manager) + if manager.errors.is_errors(): + manager.log("Found %d errors (before de-duping)" % manager.errors.num_messages()) + manager.errors.raise_error() + + +def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph: + """Given some source files, load the full dependency graph.""" + graph = {} # type: Graph + # The deque is used to implement breadth first traversal. + new = collections.deque() # type: collections.deque[State] + # Seed graph with roots. + for bs in sources: + try: + st = State(bs.module, bs.path, bs.text, manager) + except ModuleNotFound: + continue + if st.id in graph: + manager.errors.set_file(st.xpath) + manager.errors.report(1, "Duplicate module named '%s'" % st.id) + manager.errors.raise_error() + graph[st.id] = st + new.append(st) + # Collect dependencies. We go breadth-first. + while new: + st = new.popleft() + for dep in st.roots + st.dependencies: + if dep not in graph: + try: + if dep in st.roots: + # Roots don't have import context. + newst = State(dep, None, None, manager) + else: + newst = State(dep, None, None, manager, st, st.dep_line_map.get(dep, 1)) + except ModuleNotFound: + if dep in st.dependencies: + st.dependencies.remove(dep) + else: + assert newst.id not in graph, newst.id + graph[newst.id] = newst + new.append(newst) + return graph + + +def process_graph(graph: Graph, manager: BuildManager) -> None: + """Process everything in dependency order.""" + sccs = sorted_components(graph) + manager.log("Found %d SCCs; largest has %d nodes" % + (len(sccs), max(len(scc) for scc in sccs))) + for ascc in sccs: + # Sort the SCC's nodes in *reverse* order or encounter. + # This is a heuristic for handling import cycles. + # Note that ascc is a set, and scc is a list. + scc = sorted(ascc, key=lambda id: -graph[id].order) + # If builtins is in the list, move it last. + if 'builtins' in ascc: + scc.remove('builtins') + scc.append('builtins') + # TODO: Do something about mtime ordering. + stale_scc = {id for id in scc if not graph[id].is_fresh()} + fresh = not stale_scc + deps = set() + for id in scc: + deps.update(graph[id].dependencies) + deps -= ascc + stale_deps = {id for id in deps if not graph[id].is_fresh()} + fresh = fresh and not stale_deps + if fresh: + fresh_msg = "fresh" + elif stale_scc: + fresh_msg = "inherently stale (%s)" % " ".join(sorted(stale_scc)) + if stale_deps: + fresh_msg += " with stale deps (%s)" % " ".join(sorted(stale_deps)) + else: + fresh_msg = "stale due to deps (%s)" % " ".join(sorted(stale_deps)) + manager.log("Processing SCC of size %d (%s) as %s" % + (len(scc), " ".join(scc), fresh_msg)) + if fresh: + process_fresh_scc(graph, scc) + else: + process_stale_scc(graph, scc) + + +def process_fresh_scc(graph: Graph, scc: List[str]) -> None: + """Process the modules in one SCC from their cached data.""" + for id in scc: + graph[id].load_tree() + for id in scc: + graph[id].patch_parent() + for id in scc: + graph[id].fix_cross_refs() + for id in scc: + graph[id].calculate_mros() + + +def process_stale_scc(graph: Graph, scc: List[str]) -> None: + """Process the modules in one SCC from source code.""" + for id in scc: + graph[id].clear_fresh() + for id in scc: + # We may already have parsed the module, or not. + # If the former, parse_file() is a no-op. + graph[id].parse_file() + for id in scc: + graph[id].patch_parent() + for id in scc: + graph[id].semantic_analysis() + for id in scc: + graph[id].semantic_analysis_pass_three() + for id in scc: + graph[id].type_check() + graph[id].write_cache() + + +def sorted_components(graph: Graph) -> List[AbstractSet[str]]: + """Return the graph's SCCs, topologically sorted by dependencies.""" + # Compute SCCs. + vertices = set(graph) + edges = {id: st.dependencies for id, st in graph.items()} + sccs = list(strongly_connected_components_path(vertices, edges)) + # Topsort. + sccsmap = {id: frozenset(scc) for scc in sccs for id in scc} + data = {} # type: Dict[AbstractSet[str], Set[AbstractSet[str]]] + for scc in sccs: + deps = set() # type: Set[AbstractSet[str]] + for id in scc: + deps.update(sccsmap[x] for x in graph[id].dependencies) + data[frozenset(scc)] = deps + return list(topsort(data)) + + +def strongly_connected_components_path(vertices: Set[str], + edges: Dict[str, List[str]]) -> Iterator[Set[str]]: + """Compute Strongly Connected Components of a graph. + + From http://code.activestate.com/recipes/578507/. + """ + identified = set() # type: Set[str] + stack = [] # type: List[str] + index = {} # type: Dict[str, int] + boundaries = [] # type: List[int] + + def dfs(v: str) -> Iterator[Set[str]]: + index[v] = len(stack) + stack.append(v) + boundaries.append(index[v]) + + for w in edges[v]: + if w not in index: + # For Python >= 3.3, replace with "yield from dfs(w)" + for scc in dfs(w): + yield scc + elif w not in identified: + while index[w] < boundaries[-1]: + boundaries.pop() + + if boundaries[-1] == index[v]: + boundaries.pop() + scc = set(stack[index[v]:]) + del stack[index[v]:] + identified.update(scc) + yield scc + + for v in vertices: + if v not in index: + # For Python >= 3.3, replace with "yield from dfs(v)" + for scc in dfs(v): + yield scc + + +def topsort(data: Dict[AbstractSet[str], Set[AbstractSet[str]]]) -> Iterable[AbstractSet[str]]: + """Topological sort. Consumes its argument. + + From http://code.activestate.com/recipes/577413/. + """ + # TODO: Use a faster algorithm? + for k, v in data.items(): + v.discard(k) # Ignore self dependencies. + for item in set.union(*data.values()) - set(data.keys()): + data[item] = set() + while True: + ready = {item for item, dep in data.items() if not dep} + if not ready: + break + # TODO: Return the items in a reproducible order, or return + # the entire set of items. + for item in ready: + yield item + data = {item: (dep - ready) + for item, dep in data.items() + if item not in ready} + assert not data, "A cyclic dependency exists amongst %r" % data diff --git a/mypy/depmgr.py b/mypy/depmgr.py deleted file mode 100644 index 282aac49927a..000000000000 --- a/mypy/depmgr.py +++ /dev/null @@ -1,637 +0,0 @@ -"""Dependency manager. - -This will replace the dependency management in build.py. - -Design -====== - -Ideally -------- - -A. Collapse cycles (each SCC -- strongly connected component -- - becomes one "supernode"). - -B. Topologically sort nodes based on dependencies. - -C. Process from leaves towards roots. - -Wrinkles --------- - -a. Need to parse source modules to determine dependencies. - -b. Processing order for modules within an SCC. - -c. Must order mtimes of files to decide whether to re-process; depends - on clock never resetting. - -d. from P import M; checks filesystem whether module P.M exists in - filesystem. - -e. Race conditions, where somebody modifies a file while we're - processing. I propose not to modify the algorithm to handle this, - but to detect when this could lead to inconsistencies. (For - example, when we decide on the dependencies based on cache - metadata, and then we decide to re-parse a file because of a stale - dependency, if the re-parsing leads to a different list of - dependencies we should warn the user or start over.) - -Steps ------ - -1. For each explicitly given module find the source file location. - -2. For each such module load and check the cache metadata, and decide - whether it's valid. - -3. Now recursively (or iteratively) find dependencies and add those to - the graph: - - - for cached nodes use the list of dependencies from the cache - metadata (this will be valid even if we later end up re-parsing - the same source); - - - for uncached nodes parse the file and process all imports found, - taking care of (a) above. - -Step 3 should also address (d) above. - -Once step 3 terminates we have the entire dependency graph, and for -each module we've either loaded the cache metadata or parsed the -source code. (However, we may still need to parse those modules for -which we have cache metadata but that depend, directly or indirectly, -on at least one module for which the cache metadata is stale.) - -Now we can execute steps A-C from the first section. Finding SCCs for -step A shouldn't be hard; there's a recipe here: -http://code.activestate.com/recipes/578507/. There's also a plethora -of topsort recipes, e.g. http://code.activestate.com/recipes/577413/. - -For single nodes, processing is simple. If the node was cached, we -deserialize the cache data and fix up cross-references. Otherwise, we -do semantic analysis followed by type checking. We also handle (c) -above; if a module has valid cache data *but* any of its -dependendencies was processed from source, then the module should be -processed from source. - -A relatively simple optimization (outside SCCs) we might do in the -future is as follows: if a node's cache data is valid, but one or more -of its dependencies are out of date so we have to re-parse the node -from source, once we have fully type-checked the node, we can decide -whether its symbol table actually changed compared to the cache data -(by reading the cache data and comparing it to the data we would be -writing). If there is no change we can declare the node up to date, -and any node that depends (and for which we have cached data, and -whose other dependencies are up to date) on it won't need to be -re-parsed from source. - -Import cycles -------------- - -Finally we have to decide how to handle (c), import cycles. Here -we'll need a modified version of the original state machine -(build.py), but we only need to do this per SCC, and we won't have to -deal with changes to the list of nodes while we're processing it. - -If all nodes in the SCC have valid cache metadata and all dependencies -outside the SCC are still valid, we can proceed as follows: - - 1. Load cache data for all nodes in the SCC. - - 2. Fix up cross-references for all nodes in the SCC. - -Otherwise, the simplest (but potentially slow) way to proceed is to -invalidate all cache data in the SCC and re-parse all nodes in the SCC -from source. We can do this as follows: - - 1. Parse source for all nodes in the SCC. - - 2. Semantic analysis for all nodes in the SCC. - - 3. Type check all nodes in the SCC. - -(If there are more passes the process is the same -- each pass should -be done for all nodes before starting the next pass for any nodes in -the SCC.) - -We could process the nodes in the SCC in any order. For sentimental -reasons, I've decided to process them in the reverse order in which we -encountered them when originally constructing the graph. That's how -the old build.py deals with cycles, and at least this reproduces the -previous implementation more accurately. - -Can we do better than re-parsing all nodes in the SCC when any of its -dependencies are out of date? It's doubtful. The optimization -mentioned at the end of the previous section would require re-parsing -and type-checking a node and then comparing its symbol table to the -cached data; but because the node is part of a cycle we can't -technically type-check it until the semantic analysis of all other -nodes in the cycle has completed. (This is an important issue because -we have a cycle of over 500 modules in the server repo. But I'd like -to deal with it later.) - -Additional wrinkles -------------------- - -During implementation more wrinkles were found. - -- When a submodule of a package (e.g. x.y) is encountered, the parent - package (e.g. x) must also be loaded, but it is not strictly a - dependency. See State.add_roots() below. -""" - -import collections -import contextlib -import json - -from typing import Dict, List, Set, AbstractSet, Tuple, Iterable, Iterator, Optional - -from mypy.build import (BuildManager, BuildSource, CacheMeta, - TYPE_CHECK, - INCREMENTAL, FAST_PARSER, SILENT_IMPORTS, - DUMP_TYPE_STATS, DUMP_INFER_STATS, - find_module, read_with_python_encoding, - find_cache_meta, write_cache) -from mypy.errors import CompileError -from mypy.fixup import fixup_module_pass_one, fixup_module_pass_two -from mypy.nodes import MypyFile, SymbolTableNode, MODULE_REF -from mypy.parse import parse -from mypy.semanal import FirstPass -from mypy.stats import dump_type_stats - - -class ModuleNotFound(Exception): - """Control flow exception to signal that a module was not found.""" - - -class State: - """The state for a module. - - It's a package if path ends in __init__.py[i]. - - The source is only used for the -c command line option; in that - case path is None. Otherwise source is None and path isn't. - """ - - manager = None # type: BuildManager - order_counter = 0 # Class variable - order = None # type: int # Order in which modules were encountered - id = None # type: str # Fully qualified module name - path = None # type: Optional[str] # Path to module source - xpath = None # type: str # Path or '' - source = None # type: Optional[str] # Module source code - meta = None # type: Optional[CacheMeta] - data = None # type: Optional[str] - tree = None # type: Optional[MypyFile] - dependencies = None # type: List[str] - dep_line_map = None # tyoe: Dict[str, int] # Line number where imported - roots = None # type: Optional[List[str]] - import_context = None # type: List[Tuple[str, int]] - caller_state = None # type: Optional[State] - caller_line = 0 - - def __init__(self, - id: Optional[str], - path: Optional[str], - source: Optional[str], - manager: BuildManager, - caller_state: 'State' = None, - caller_line: int = 0, - ) -> None: - assert id or path or source is not None, "Neither id, path nor source given" - self.manager = manager - State.order_counter += 1 - self.order = State.order_counter - self.caller_state = caller_state - self.caller_line = caller_line - if caller_state: - self.import_context = caller_state.import_context[:] - self.import_context.append((caller_state.xpath, caller_line)) - else: - self.import_context = [] - self.id = id or '__main__' - if not path and source is None: - file_id = id - if id == 'builtins' and manager.pyversion[0] == 2: - # The __builtin__ module is called internally by mypy - # 'builtins' in Python 2 mode (similar to Python 3), - # but the stub file is __builtin__.pyi. The reason is - # that a lot of code hard-codes 'builtins.x' and it's - # easier to work it around like this. It also means - # that the implementation can mostly ignore the - # difference and just assume 'builtins' everywhere, - # which simplifies code. - file_id = '__builtin__' - path = find_module(file_id, manager.lib_path) - if not path: - # Could not find a module. Typically the reason is a - # misspelled module name, missing stub, module not in - # search path or the module has not been installed. - if self.caller_state: - if not (SILENT_IMPORTS in manager.flags or - (caller_state.tree is not None and - (caller_line in caller_state.tree.ignored_lines or - 'import' in caller_state.tree.weak_opts))): - save_import_context = manager.errors.import_context() - manager.errors.set_import_context(caller_state.import_context) - manager.module_not_found(caller_state.xpath, caller_line, id) - manager.errors.set_import_context(save_import_context) - manager.missing_modules.add(id) - raise ModuleNotFound - else: - # If this is a root it's always fatal. - # TODO: This might hide non-fatal errors from - # roots processed earlier. - raise CompileError(["mypy: can't find module '%s'" % id]) - self.path = path - self.xpath = path or '' - self.source = source - if path and source is None and INCREMENTAL in manager.flags: - self.meta = find_cache_meta(self.id, self.path, manager) - # TODO: Get mtime if not cached. - self.add_roots() - if self.meta: - self.dependencies = self.meta.dependencies - self.dep_line_map = {} - else: - # Parse the file (and then some) to get the dependencies. - self.parse_file() - - def add_roots(self) -> None: - # All parent packages are new roots. - roots = [] - parent = self.id - while '.' in parent: - parent, _ = parent.rsplit('.', 1) - roots.append(parent) - self.roots = roots - - def is_fresh(self) -> bool: - """Return whether the cache data for this file is fresh.""" - return self.meta is not None - - def clear_fresh(self) -> None: - """Throw away the cache data for this file, marking it as stale.""" - self.meta = None - - def check_blockers(self) -> None: - """Raise CompileError if a blocking error is detected.""" - if self.manager.errors.is_blockers(): - self.manager.log("Bailing due to blocking errors") - self.manager.errors.raise_error() - - @contextlib.contextmanager - def wrap_context(self) -> Iterator[None]: - save_import_context = self.manager.errors.import_context() - self.manager.errors.set_import_context(self.import_context) - yield - self.manager.errors.set_import_context(save_import_context) - self.check_blockers() - - # Methods for processing cached modules. - - def load_tree(self) -> None: - with open(self.meta.data_json) as f: - data = json.load(f) - # TODO: Assert data file wasn't changed. - self.tree = MypyFile.deserialize(data) - self.manager.modules[self.id] = self.tree - - def fix_cross_refs(self) -> None: - fixup_module_pass_one(self.tree, self.manager.modules) - - def calculate_mros(self) -> None: - fixup_module_pass_two(self.tree, self.manager.modules) - - # Methods for processing modules from source code. - - def parse_file(self) -> None: - if self.tree is not None: - # The file was already parsed (in __init__()). - return - - manager = self.manager - modules = manager.modules - manager.log("Parsing %s" % self.xpath) - - with self.wrap_context(): - source = self.source - self.source = None # We won't need it again. - if self.path and source is None: - try: - source = read_with_python_encoding(self.path, manager.pyversion) - except IOError as ioerr: - raise CompileError([ - "mypy: can't read file '{}': {}".format(self.path, ioerr.strerror)]) - except UnicodeDecodeError as decodeerr: - raise CompileError([ - "mypy: can't decode file '{}': {}".format(self.path, str(decodeerr))]) - self.tree = parse_file(self.id, self.xpath, source, manager) - - modules[self.id] = self.tree - - # Do the first pass of semantic analysis: add top-level - # definitions in the file to the symbol table. We must do - # this before processing imports, since this may mark some - # import statements as unreachable. - first = FirstPass(manager.semantic_analyzer) - first.analyze(self.tree, self.xpath, self.id) - - # Initialize module symbol table, which was populated by the - # semantic analyzer. - # TODO: Why can't FirstPass .analyze() do this? - self.tree.names = manager.semantic_analyzer.globals - - # Compute (direct) dependencies. - # Add all direct imports (this is why we needed the first pass). - # Also keep track of each dependency's source line. - dependencies = [] - dep_line_map = {} - for id, line in manager.all_imported_modules_in_file(self.tree): - # Omit missing modules, as otherwise we could not type-check - # programs with missing modules. - if id == self.id or id in manager.missing_modules: - continue - if id == '': - # Must be from a relative import. - manager.errors.set_file(self.xpath) - manager.errors.report(line, "No parent module -- cannot perform relative import", - blocker=True) - if id not in dep_line_map: - dependencies.append(id) - dep_line_map[id] = line - # Every module implicitly depends on builtins. - if self.id != 'builtins' and 'builtins' not in dependencies: - dependencies.append('builtins') - - # If self.dependencies is already set, it was read from the - # cache, but for some reason we're re-parsing the file. - # Double-check that the dependencies still match (otherwise - # the graph is out of date). - if self.dependencies is not None and dependencies != self.dependencies: - # TODO: Make this into a reasonable error message. - print("HELP!! Dependencies changed!") # Probably the file was edited. - print(" Cached:", self.dependencies) - print(" Source:", dependencies) - self.dependencies = dependencies - self.dep_line_map = dep_line_map - self.check_blockers() - - def patch_parent(self) -> None: - # Include module in the symbol table of the enclosing package. - if '.' not in self.id: - return - manager = self.manager - modules = manager.modules - parent, child = self.id.rsplit('.', 1) - if parent in modules: - manager.trace("Added %s.%s" % (parent, child)) - modules[parent].names[child] = SymbolTableNode(MODULE_REF, self.tree, parent) - else: - manager.log("Hm... couldn't add %s.%s" % (parent, child)) - - def semantic_analysis(self) -> None: - with self.wrap_context(): - self.manager.semantic_analyzer.visit_file(self.tree, self.xpath) - - def semantic_analysis_pass_three(self) -> None: - with self.wrap_context(): - self.manager.semantic_analyzer_pass3.visit_file(self.tree, self.xpath) - if DUMP_TYPE_STATS in self.manager.flags: - dump_type_stats(self.tree, self.xpath) - - def type_check(self) -> None: - manager = self.manager - if manager.target < TYPE_CHECK: - return - with self.wrap_context(): - manager.type_checker.visit_file(self.tree, self.xpath) - type_map = manager.type_checker.type_map - if DUMP_INFER_STATS in manager.flags: - dump_type_stats(self.tree, self.xpath, inferred=True, typemap=type_map) - manager.reports.file(self.tree, type_map=type_map) - - def write_cache(self) -> None: - if self.path and INCREMENTAL in self.manager.flags and not self.manager.errors.is_errors(): - write_cache(self.id, self.path, self.tree, list(self.dependencies), self.manager) - - -# TODO: This would make a nice method on BuildManager. -def parse_file(id: str, path: str, source: str, manager: BuildManager) -> MypyFile: - """Parse the source of a file with the given name. - - Raise CompileError if there is a parse error. - """ - errors = manager.errors - num_errs = errors.num_messages() - tree = parse(source, path, errors, - pyversion=manager.pyversion, - custom_typing_module=manager.custom_typing_module, - implicit_any=manager.implicit_any, - fast_parser=FAST_PARSER in manager.flags) - tree._fullname = id - if errors.num_messages() != num_errs: - manager.log("Bailing due to parse errors") - errors.raise_error() - return tree - - -Graph = Dict[str, State] - - -def dispatch(sources: List[BuildSource], manager: BuildManager) -> None: - manager.log("Using new dependency manager") - graph = load_graph(sources, manager) - manager.log("Loaded graph with %d nodes" % len(graph)) - process_graph(graph, manager) - if manager.errors.is_errors(): - manager.log("Found %d errors (before de-duping)" % manager.errors.num_messages()) - manager.errors.raise_error() - - -def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph: - """Given some source files, load the full dependency graph.""" - graph = {} # type: Graph - # The deque is used to implement breadth first traversal. - new = collections.deque() # type: collections.deque[State] - # Seed graph with roots. - for bs in sources: - try: - st = State(bs.module, bs.path, bs.text, manager) - except ModuleNotFound: - continue - if st.id in graph: - manager.errors.set_file(st.xpath) - manager.errors.report(1, "Duplicate module named '%s'" % st.id) - manager.errors.raise_error() - graph[st.id] = st - new.append(st) - # Collect dependencies. We go breadth-first. - while new: - st = new.popleft() - for dep in st.roots + st.dependencies: - if dep not in graph: - try: - if dep in st.roots: - # Roots don't have import context. - newst = State(dep, None, None, manager) - else: - newst = State(dep, None, None, manager, st, st.dep_line_map.get(dep, 1)) - except ModuleNotFound: - if dep in st.dependencies: - st.dependencies.remove(dep) - else: - assert newst.id not in graph, newst.id - graph[newst.id] = newst - new.append(newst) - return graph - - -def process_graph(graph: Graph, manager: BuildManager) -> None: - """Process everything in dependency order.""" - sccs = sorted_components(graph) - manager.log("Found %d SCCs; largest has %d nodes" % - (len(sccs), max(len(scc) for scc in sccs))) - for ascc in sccs: - # Sort the SCC's nodes in *reverse* order or encounter. - # This is a heuristic for handling import cycles. - # Note that ascc is a set, and scc is a list. - scc = sorted(ascc, key=lambda id: -graph[id].order) - # If builtins is in the list, move it last. - if 'builtins' in ascc: - scc.remove('builtins') - scc.append('builtins') - # TODO: Do something about mtime ordering. - stale_scc = {id for id in scc if not graph[id].is_fresh()} - fresh = not stale_scc - deps = set() - for id in scc: - deps.update(graph[id].dependencies) - deps -= ascc - stale_deps = {id for id in deps if not graph[id].is_fresh()} - fresh = fresh and not stale_deps - if fresh: - fresh_msg = "fresh" - elif stale_scc: - fresh_msg = "inherently stale (%s)" % " ".join(sorted(stale_scc)) - if stale_deps: - fresh_msg += " with stale deps (%s)" % " ".join(sorted(stale_deps)) - else: - fresh_msg = "stale due to deps (%s)" % " ".join(sorted(stale_deps)) - manager.log("Processing SCC of size %d (%s) as %s" % - (len(scc), " ".join(scc), fresh_msg)) - if fresh: - process_fresh_scc(graph, scc) - else: - process_stale_scc(graph, scc) - - -def process_fresh_scc(graph: Graph, scc: List[str]) -> None: - """Process the modules in one SCC from their cached data.""" - for id in scc: - graph[id].load_tree() - for id in scc: - graph[id].patch_parent() - for id in scc: - graph[id].fix_cross_refs() - for id in scc: - graph[id].calculate_mros() - - -def process_stale_scc(graph: Graph, scc: List[str]) -> None: - """Process the modules in one SCC from source code.""" - for id in scc: - graph[id].clear_fresh() - for id in scc: - # We may already have parsed the module, or not. - # If the former, parse_file() is a no-op. - graph[id].parse_file() - for id in scc: - graph[id].patch_parent() - for id in scc: - graph[id].semantic_analysis() - for id in scc: - graph[id].semantic_analysis_pass_three() - for id in scc: - graph[id].type_check() - graph[id].write_cache() - - -def sorted_components(graph: Graph) -> List[AbstractSet[str]]: - """Return the graph's SCCs, topologically sorted by dependencies.""" - # Compute SCCs. - vertices = set(graph) - edges = {id: st.dependencies for id, st in graph.items()} - sccs = list(strongly_connected_components_path(vertices, edges)) - # Topsort. - sccsmap = {id: frozenset(scc) for scc in sccs for id in scc} - data = {} # type: Dict[AbstractSet[str], Set[AbstractSet[str]]] - for scc in sccs: - deps = set() # type: Set[AbstractSet[str]] - for id in scc: - deps.update(sccsmap[x] for x in graph[id].dependencies) - data[frozenset(scc)] = deps - return list(topsort(data)) - - -def strongly_connected_components_path(vertices: Set[str], - edges: Dict[str, List[str]]) -> Iterator[Set[str]]: - """Compute Strongly Connected Components of a graph. - - From http://code.activestate.com/recipes/578507/. - """ - identified = set() # type: Set[str] - stack = [] # type: List[str] - index = {} # type: Dict[str, int] - boundaries = [] # type: List[int] - - def dfs(v: str) -> Iterator[Set[str]]: - index[v] = len(stack) - stack.append(v) - boundaries.append(index[v]) - - for w in edges[v]: - if w not in index: - # For Python >= 3.3, replace with "yield from dfs(w)" - for scc in dfs(w): - yield scc - elif w not in identified: - while index[w] < boundaries[-1]: - boundaries.pop() - - if boundaries[-1] == index[v]: - boundaries.pop() - scc = set(stack[index[v]:]) - del stack[index[v]:] - identified.update(scc) - yield scc - - for v in vertices: - if v not in index: - # For Python >= 3.3, replace with "yield from dfs(v)" - for scc in dfs(v): - yield scc - - -def topsort(data: Dict[AbstractSet[str], Set[AbstractSet[str]]]) -> Iterable[AbstractSet[str]]: - """Topological sort. Consumes its argument. - - From http://code.activestate.com/recipes/577413/. - """ - # TODO: Use a faster algorithm? - for k, v in data.items(): - v.discard(k) # Ignore self dependencies. - for item in set.union(*data.values()) - set(data.keys()): - data[item] = set() - while True: - ready = {item for item, dep in data.items() if not dep} - if not ready: - break - # TODO: Return the items in a reproducible order, or return - # the entire set of items. - for item in ready: - yield item - data = {item: (dep - ready) - for item, dep in data.items() - if item not in ready} - assert not data, "A cyclic dependency exists amongst %r" % data From 6b307c449ddcea98328eca4e02dded3f27f438af Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 12 Mar 2016 18:28:44 -0800 Subject: [PATCH 080/117] Make parse_file() a BuildManager method. --- mypy/build.py | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index ad756671d3b5..4d760caa6569 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -378,6 +378,23 @@ def is_module(self, id: str) -> bool: """Is there a file in the file system corresponding to module id?""" return find_module(id, self.lib_path) is not None + def parse_file(self, id: str, path: str, source: str) -> MypyFile: + """Parse the source of a file with the given name. + + Raise CompileError if there is a parse error. + """ + num_errs = self.errors.num_messages() + tree = parse(source, path, self.errors, + pyversion=self.pyversion, + custom_typing_module=self.custom_typing_module, + implicit_any=self.implicit_any, + fast_parser=FAST_PARSER in self.flags) + tree._fullname = id + if self.errors.num_messages() != num_errs: + self.log("Bailing due to parse errors") + self.errors.raise_error() + return tree + def module_not_found(self, path: str, line: int, id: str) -> None: self.errors.set_file(path) stub_msg = "(Stub files are from https://github.com/python/typeshed)" @@ -1007,7 +1024,7 @@ def parse_file(self) -> None: except UnicodeDecodeError as decodeerr: raise CompileError([ "mypy: can't decode file '{}': {}".format(self.path, str(decodeerr))]) - self.tree = parse_file(self.id, self.xpath, source, manager) + self.tree = manager.parse_file(self.id, self.xpath, source) modules[self.id] = self.tree @@ -1097,26 +1114,6 @@ def write_cache(self) -> None: write_cache(self.id, self.path, self.tree, list(self.dependencies), self.manager) -# TODO: This would make a nice method on BuildManager. -def parse_file(id: str, path: str, source: str, manager: BuildManager) -> MypyFile: - """Parse the source of a file with the given name. - - Raise CompileError if there is a parse error. - """ - errors = manager.errors - num_errs = errors.num_messages() - tree = parse(source, path, errors, - pyversion=manager.pyversion, - custom_typing_module=manager.custom_typing_module, - implicit_any=manager.implicit_any, - fast_parser=FAST_PARSER in manager.flags) - tree._fullname = id - if errors.num_messages() != num_errs: - manager.log("Bailing due to parse errors") - errors.raise_error() - return tree - - Graph = Dict[str, State] From 4b0657691deb7099b8a1455377305c81f41a3968 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 12 Mar 2016 19:17:46 -0800 Subject: [PATCH 081/117] Roots are dependencies after all. But they're still special. --- mypy/build.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 4d760caa6569..516c738d95d7 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1043,7 +1043,7 @@ def parse_file(self) -> None: # Compute (direct) dependencies. # Add all direct imports (this is why we needed the first pass). # Also keep track of each dependency's source line. - dependencies = [] + dependencies = self.roots[:] dep_line_map = {} # type: Dict[str, int] # id -> line for id, line in manager.all_imported_modules_in_file(self.tree): # Omit missing modules, as otherwise we could not type-check @@ -1059,7 +1059,7 @@ def parse_file(self) -> None: dependencies.append(id) dep_line_map[id] = line # Every module implicitly depends on builtins. - if self.id != 'builtins' and 'builtins' not in dependencies: + if self.id != 'builtins' and 'builtins' not in dep_line_map: dependencies.append('builtins') # If self.dependencies is already set, it was read from the From 182472e27c362dc8e5ea40d6bc72a0682a0aaedb Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 12 Mar 2016 20:05:50 -0800 Subject: [PATCH 082/117] Implement --silent-imports. --- mypy/build.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mypy/build.py b/mypy/build.py index 516c738d95d7..3ac7f5fe73af 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -921,6 +921,11 @@ def __init__(self, # which simplifies code. file_id = '__builtin__' path = find_module(file_id, manager.lib_path) + if (path and SILENT_IMPORTS in manager.flags and + path.endswith('.py') and caller_state): + # In silent mode, for a non-root, don't load .py files. + # (This will still load a parent package's __init__.py.) + path = None if not path: # Could not find a module. Typically the reason is a # misspelled module name, missing stub, module not in From 8c56b3894f9e13fb14de672e6afd55c4274ead39 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 13 Mar 2016 10:38:48 -0700 Subject: [PATCH 083/117] Compare cache file mtimes. --- mypy/build.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 3ac7f5fe73af..bb954cee3a93 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1184,7 +1184,6 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: if 'builtins' in ascc: scc.remove('builtins') scc.append('builtins') - # TODO: Do something about mtime ordering. stale_scc = {id for id in scc if not graph[id].is_fresh()} fresh = not stale_scc deps = set() @@ -1194,7 +1193,30 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: stale_deps = {id for id in deps if not graph[id].is_fresh()} fresh = fresh and not stale_deps if fresh: - fresh_msg = "fresh" + # All cache files are fresh. Check that no dependency's + # cache file is newer than any scc node's cache file. + oldest_in_scc = min(graph[id].meta.data_mtime for id in scc) + newest_in_deps = 0 if not deps else max(graph[dep].meta.data_mtime for dep in deps) + if manager.flags.count(VERBOSE) >= 2: # Dump all mtimes for extreme debugging. + all_ids = sorted(ascc | deps, key=lambda id: graph[id].meta.data_mtime) + for id in all_ids: + if id in scc: + if graph[id].meta.data_mtime < newest_in_deps: + key = "*id:" + else: + key = "id:" + else: + if graph[id].meta.data_mtime > oldest_in_scc: + key = "+dep:" + else: + key = "dep:" + manager.trace(" %5s %.0f %s" % (key, graph[id].meta.data_mtime, id)) + # If equal, give the benefit of the doubt, due to 1-sec time granularity. + if oldest_in_scc < newest_in_deps: + fresh = False + fresh_msg = "out of date by %.0f seconds" % (newest_in_deps - oldest_in_scc) + else: + fresh_msg = "fresh" elif stale_scc: fresh_msg = "inherently stale (%s)" % " ".join(sorted(stale_scc)) if stale_deps: From 0273c0509b47975d9175bd41b0091a45decbf049 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 13 Mar 2016 16:45:45 -0700 Subject: [PATCH 084/117] Well... roots are *not* dependencies after all. It broke typeshed testing. --- mypy/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypy/build.py b/mypy/build.py index bb954cee3a93..5127a7acbe27 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1048,7 +1048,7 @@ def parse_file(self) -> None: # Compute (direct) dependencies. # Add all direct imports (this is why we needed the first pass). # Also keep track of each dependency's source line. - dependencies = self.roots[:] + dependencies = [] dep_line_map = {} # type: Dict[str, int] # id -> line for id, line in manager.all_imported_modules_in_file(self.tree): # Omit missing modules, as otherwise we could not type-check From 123c340b4ca19b43140d414dfdf22504c6ddf216 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 13 Mar 2016 17:34:08 -0700 Subject: [PATCH 085/117] remove_cwd_prefix_from_path() should check for __init__.pyi. --- mypy/build.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mypy/build.py b/mypy/build.py index 5127a7acbe27..0ffcc3a03e8b 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -434,7 +434,9 @@ def remove_cwd_prefix_from_path(p: str) -> str: if basename(cur) != '': cur += os.sep # Compute root path. - while p and os.path.isfile(os.path.join(p, '__init__.py')): + while (p and + (os.path.isfile(os.path.join(p, '__init__.py')) or + os.path.isfile(os.path.join(p, '__init__.pyi')))): dir, base = os.path.split(p) if not base: break From 80622d71fd82be32503414416db3676b9023869e Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 14 Mar 2016 07:08:36 -0700 Subject: [PATCH 086/117] Rename roots to ancestors. Fix --silent for them. --- mypy/build.py | 50 ++++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 0ffcc3a03e8b..ef542e3d3919 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -856,7 +856,7 @@ def write_cache(id: str, path: str, tree: MypyFile, dependencies: List[str], - When a submodule of a package (e.g. x.y) is encountered, the parent package (e.g. x) must also be loaded, but it is not strictly a - dependency. See State.add_roots() below. + dependency. See State.add_ancestors() below. """ @@ -867,8 +867,6 @@ class ModuleNotFound(Exception): class State: """The state for a module. - It's a package if path ends in __init__.py[i]. - The source is only used for the -c command line option; in that case path is None. Otherwise source is None and path isn't. """ @@ -885,7 +883,7 @@ class State: tree = None # type: Optional[MypyFile] dependencies = None # type: List[str] dep_line_map = None # tyoe: Dict[str, int] # Line number where imported - roots = None # type: Optional[List[str]] + ancestors = None # type: Optional[List[str]] import_context = None # type: List[Tuple[str, int]] caller_state = None # type: Optional[State] caller_line = 0 @@ -897,6 +895,7 @@ def __init__(self, manager: BuildManager, caller_state: 'State' = None, caller_line: int = 0, + is_ancestor: bool = False, ) -> None: assert id or path or source is not None, "Neither id, path nor source given" self.manager = manager @@ -923,16 +922,18 @@ def __init__(self, # which simplifies code. file_id = '__builtin__' path = find_module(file_id, manager.lib_path) - if (path and SILENT_IMPORTS in manager.flags and - path.endswith('.py') and caller_state): - # In silent mode, for a non-root, don't load .py files. - # (This will still load a parent package's __init__.py.) - path = None - if not path: + if path: + # In silent mode, don't import .py files. + if (SILENT_IMPORTS in manager.flags and + path.endswith('.py') and (caller_state or is_ancestor)): + path = None + manager.missing_modules.add(id) + raise ModuleNotFound + else: # Could not find a module. Typically the reason is a # misspelled module name, missing stub, module not in # search path or the module has not been installed. - if self.caller_state: + if caller_state: if not (SILENT_IMPORTS in manager.flags or (caller_state.tree is not None and (caller_line in caller_state.tree.ignored_lines or @@ -944,9 +945,9 @@ def __init__(self, manager.missing_modules.add(id) raise ModuleNotFound else: - # If this is a root it's always fatal. + # If we can't find a root source it's always fatal. # TODO: This might hide non-fatal errors from - # roots processed earlier. + # root sources processed earlier. raise CompileError(["mypy: can't find module '%s'" % id]) self.path = path self.xpath = path or '' @@ -954,7 +955,7 @@ def __init__(self, if path and source is None and INCREMENTAL in manager.flags: self.meta = find_cache_meta(self.id, self.path, manager) # TODO: Get mtime if not cached. - self.add_roots() + self.add_ancestors() if self.meta: self.dependencies = self.meta.dependencies self.dep_line_map = {} @@ -962,14 +963,14 @@ def __init__(self, # Parse the file (and then some) to get the dependencies. self.parse_file() - def add_roots(self) -> None: - # All parent packages are new roots. - roots = [] + def add_ancestors(self) -> None: + # All parent packages are new ancestors. + ancestors = [] parent = self.id while '.' in parent: parent, _ = parent.rsplit('.', 1) - roots.append(parent) - self.roots = roots + ancestors.append(parent) + self.ancestors = ancestors def is_fresh(self) -> bool: """Return whether the cache data for this file is fresh.""" @@ -1139,7 +1140,7 @@ def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph: graph = {} # type: Graph # The deque is used to implement breadth first traversal. new = collections.deque() # type: collections.deque[State] - # Seed graph with roots. + # Seed the graph with the initial root sources. for bs in sources: try: st = State(bs.module, bs.path, bs.text, manager) @@ -1154,12 +1155,13 @@ def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph: # Collect dependencies. We go breadth-first. while new: st = new.popleft() - for dep in st.roots + st.dependencies: + for dep in st.ancestors + st.dependencies: if dep not in graph: try: - if dep in st.roots: - # Roots don't have import context. - newst = State(dep, None, None, manager) + if dep in st.ancestors: + # TODO: Why not 'if dep not in st.dependencies' ? + # Ancestors don't have import context. + newst = State(dep, None, None, manager, is_ancestor=True) else: newst = State(dep, None, None, manager, st, st.dep_line_map.get(dep, 1)) except ModuleNotFound: From d35e476ec2d79557ebb1ee19678c01ed9f235a18 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 14 Mar 2016 10:13:14 -0700 Subject: [PATCH 087/117] Improve SCC logging. Fix indentation. --- mypy/build.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index ef542e3d3919..e84e158736b2 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -925,7 +925,7 @@ def __init__(self, if path: # In silent mode, don't import .py files. if (SILENT_IMPORTS in manager.flags and - path.endswith('.py') and (caller_state or is_ancestor)): + path.endswith('.py') and (caller_state or is_ancestor)): path = None manager.missing_modules.add(id) raise ModuleNotFound @@ -1227,8 +1227,11 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: fresh_msg += " with stale deps (%s)" % " ".join(sorted(stale_deps)) else: fresh_msg = "stale due to deps (%s)" % " ".join(sorted(stale_deps)) - manager.log("Processing SCC of size %d (%s) as %s" % - (len(scc), " ".join(scc), fresh_msg)) + if len(scc) == 1: + manager.log("Processing SCC sigleton (%s) as %s" % (" ".join(scc), fresh_msg)) + else: + manager.log("Processing SCC of size %d (%s) as %s" % + (len(scc), " ".join(scc), fresh_msg)) if fresh: process_fresh_scc(graph, scc) else: From e704e0c96d32ce3714d2bde859997a94a4878612 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 14 Mar 2016 11:08:48 -0700 Subject: [PATCH 088/117] Make sorted_components() work for subgraphs too. --- mypy/build.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index e84e158736b2..5c9e0d6b4eba 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1270,10 +1270,15 @@ def process_stale_scc(graph: Graph, scc: List[str]) -> None: def sorted_components(graph: Graph) -> List[AbstractSet[str]]: - """Return the graph's SCCs, topologically sorted by dependencies.""" + """Return the graph's SCCs, topologically sorted by dependencies. + + This works for a subset of the full dependency graph too; + dependencies that aren't present in graph.keys() are ignored. + """ # Compute SCCs. vertices = set(graph) - edges = {id: st.dependencies for id, st in graph.items()} + edges = {id: [dep for dep in st.dependencies if dep in graph] + for id, st in graph.items()} sccs = list(strongly_connected_components_path(vertices, edges)) # Topsort. sccsmap = {id: frozenset(scc) for scc in sccs for id in scc} @@ -1281,7 +1286,7 @@ def sorted_components(graph: Graph) -> List[AbstractSet[str]]: for scc in sccs: deps = set() # type: Set[AbstractSet[str]] for id in scc: - deps.update(sccsmap[x] for x in graph[id].dependencies) + deps.update(sccsmap[x] for x in graph[id].dependencies if x in graph) data[frozenset(scc)] = deps return list(topsort(data)) From ede81849a636e88d4f048472bc290224aa0cc7a0 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 14 Mar 2016 11:09:18 -0700 Subject: [PATCH 089/117] Horrible hack to deal with a nasty cycle in sqlalchemy.sql. --- mypy/build.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/mypy/build.py b/mypy/build.py index 5c9e0d6b4eba..6b90dd0d97a3 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1174,6 +1174,29 @@ def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph: return graph +# The key property of the ordering here that we really need is that +# .sql.base is processed before .sql.schema. +# TODO: We shouldn't need this special case. +SQLALCHEMY_HACK = [ + 'sqlalchemy.sql.base', + 'sqlalchemy.sql.type_api', + 'sqlalchemy.sql.elements', + 'sqlalchemy.sql.sqltypes', + 'sqlalchemy.sql.ddl', + 'sqlalchemy.sql.selectable', + 'sqlalchemy.sql.schema', + 'sqlalchemy.sql.functions', + 'sqlalchemy.sql.dml', + 'sqlalchemy.sql.expression', + 'sqlalchemy.sql', + 'sqlalchemy.pool', + 'sqlalchemy.schema', + 'sqlalchemy.types', + 'sqlalchemy', +] +SQLALCHEMY_HACK_SET = frozenset(SQLALCHEMY_HACK) + + def process_graph(graph: Graph, manager: BuildManager) -> None: """Process everything in dependency order.""" sccs = sorted_components(graph) @@ -1188,6 +1211,23 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: if 'builtins' in ascc: scc.remove('builtins') scc.append('builtins') + elif ascc == SQLALCHEMY_HACK_SET: + # TODO: This is a really gross hack to deal with the + # unfortunate reality that the scqlalchemy package + # contains a cycle where our usual approach isn't enough. + # There is probably a better way to handle it but I need a + # break. For example, if you look at each subpackage, + # *within* the subpackage there's a clear ordering, and we + # could handle that by running the SCC+topsort algorithm + # over that subgraph. + manager.log("Replacing %s with %s" % (scc, SQLALCHEMY_HACK_SET)) + scc = SQLALCHEMY_HACK + # Here's how I came up with the ordering in SQLALCHEMY_HACK: + # sub_graph = {id: graph[id] + # for id in scc if id.startswith('sqlalchemy.sql.')} + # sub_sccs = sorted_components(sub_graph) + # for sub_scc in sub_sccs: + # print(sub_scc) stale_scc = {id for id in scc if not graph[id].is_fresh()} fresh = not stale_scc deps = set() From 7f9fc4319d1ad736824bea4d34df4cf24ed76ae0 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 14 Mar 2016 11:10:06 -0700 Subject: [PATCH 090/117] Sync typeshed --- typeshed | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/typeshed b/typeshed index a946304d497c..540a9e18f7cb 160000 --- a/typeshed +++ b/typeshed @@ -1 +1 @@ -Subproject commit a946304d497c64920df65855e4d651aed1378b1e +Subproject commit 540a9e18f7cbb99dc6353bf1141a1845f97fecdc From eab8a821880b6dac95dc37ebcf443495c0ec2670 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 15 Mar 2016 10:46:11 -0700 Subject: [PATCH 091/117] Sync typeshed --- typeshed | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/typeshed b/typeshed index 540a9e18f7cb..6dcb32c7d904 160000 --- a/typeshed +++ b/typeshed @@ -1 +1 @@ -Subproject commit 540a9e18f7cbb99dc6353bf1141a1845f97fecdc +Subproject commit 6dcb32c7d904935a6cf2db0bdc30cdb15eb733a5 From 46c20760fa455ea67b6b099175be28a653842d9c Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 16 Mar 2016 14:47:51 -0700 Subject: [PATCH 092/117] Stabilize return order of topsort and avoid depending on p for "from p import m". Here p is a package and m is a submodule. This allows me to kill the sqlalchemy-specific hack. --- mypy/build.py | 74 +++++++++++++++++++-------------------------------- 1 file changed, 27 insertions(+), 47 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 6b90dd0d97a3..d8c3461f5a87 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -364,12 +364,22 @@ def correct_rel_imp(imp: Union[ImportFrom, ImportAll]) -> str: res.append((id, imp.line)) elif isinstance(imp, ImportFrom): cur_id = correct_rel_imp(imp) - res.append((cur_id, imp.line)) + pos = len(res) + all_are_submodules = True # Also add any imported names that are submodules. for name, __ in imp.names: sub_id = cur_id + '.' + name if self.is_module(sub_id): res.append((sub_id, imp.line)) + else: + all_are_submodules = False + # If all imported names are submodules, don't add + # cur_id as a dependency. Otherwise (i.e., if at + # least one imported name isn't a submodule) + # cur_id is also a dependency, and we should + # insert it *before* any submodules. + if not all_are_submodules: + res.insert(pos, ((cur_id, imp.line))) elif isinstance(imp, ImportAll): res.append((correct_rel_imp(imp), imp.line)) return res @@ -1174,29 +1184,6 @@ def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph: return graph -# The key property of the ordering here that we really need is that -# .sql.base is processed before .sql.schema. -# TODO: We shouldn't need this special case. -SQLALCHEMY_HACK = [ - 'sqlalchemy.sql.base', - 'sqlalchemy.sql.type_api', - 'sqlalchemy.sql.elements', - 'sqlalchemy.sql.sqltypes', - 'sqlalchemy.sql.ddl', - 'sqlalchemy.sql.selectable', - 'sqlalchemy.sql.schema', - 'sqlalchemy.sql.functions', - 'sqlalchemy.sql.dml', - 'sqlalchemy.sql.expression', - 'sqlalchemy.sql', - 'sqlalchemy.pool', - 'sqlalchemy.schema', - 'sqlalchemy.types', - 'sqlalchemy', -] -SQLALCHEMY_HACK_SET = frozenset(SQLALCHEMY_HACK) - - def process_graph(graph: Graph, manager: BuildManager) -> None: """Process everything in dependency order.""" sccs = sorted_components(graph) @@ -1211,23 +1198,6 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: if 'builtins' in ascc: scc.remove('builtins') scc.append('builtins') - elif ascc == SQLALCHEMY_HACK_SET: - # TODO: This is a really gross hack to deal with the - # unfortunate reality that the scqlalchemy package - # contains a cycle where our usual approach isn't enough. - # There is probably a better way to handle it but I need a - # break. For example, if you look at each subpackage, - # *within* the subpackage there's a clear ordering, and we - # could handle that by running the SCC+topsort algorithm - # over that subgraph. - manager.log("Replacing %s with %s" % (scc, SQLALCHEMY_HACK_SET)) - scc = SQLALCHEMY_HACK - # Here's how I came up with the ordering in SQLALCHEMY_HACK: - # sub_graph = {id: graph[id] - # for id in scc if id.startswith('sqlalchemy.sql.')} - # sub_sccs = sorted_components(sub_graph) - # for sub_scc in sub_sccs: - # print(sub_scc) stale_scc = {id for id in scc if not graph[id].is_fresh()} fresh = not stale_scc deps = set() @@ -1328,7 +1298,19 @@ def sorted_components(graph: Graph) -> List[AbstractSet[str]]: for id in scc: deps.update(sccsmap[x] for x in graph[id].dependencies if x in graph) data[frozenset(scc)] = deps - return list(topsort(data)) + res = [] + for ready in topsort(data): + # Sort the sets in ready by reversed smallest State.order. Exampes: + # + # - If ready is [{x}, {y}], x.order == 1, y.order == 2, we get + # [{y}, {x}]. + # + # - If ready is [{a, b}, {c, d}], a.order == 1, b.order == 3, + # c.order == 2, d.order == 4, the sort keys become [1, 2] + # and the result is [{c, d}, {a, b}]. + res.extend(sorted(ready, + key=lambda scc: -min(graph[id].order for id in scc))) + return res def strongly_connected_components_path(vertices: Set[str], @@ -1370,7 +1352,8 @@ def dfs(v: str) -> Iterator[Set[str]]: yield scc -def topsort(data: Dict[AbstractSet[str], Set[AbstractSet[str]]]) -> Iterable[AbstractSet[str]]: +def topsort(data: Dict[AbstractSet[str], + Set[AbstractSet[str]]]) -> Iterable[Set[AbstractSet[str]]]: """Topological sort. Consumes its argument. From http://code.activestate.com/recipes/577413/. @@ -1384,10 +1367,7 @@ def topsort(data: Dict[AbstractSet[str], Set[AbstractSet[str]]]) -> Iterable[Abs ready = {item for item, dep in data.items() if not dep} if not ready: break - # TODO: Return the items in a reproducible order, or return - # the entire set of items. - for item in ready: - yield item + yield ready data = {item: (dep - ready) for item, dep in data.items() if item not in ready} From 06c701cde63615ddfdfd7d8cf60b5cf5daa65fe1 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 17 Mar 2016 16:41:55 -0700 Subject: [PATCH 093/117] Sync typeshed (pickle). --- typeshed | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/typeshed b/typeshed index 6dcb32c7d904..1c03574ac49a 160000 --- a/typeshed +++ b/typeshed @@ -1 +1 @@ -Subproject commit 6dcb32c7d904935a6cf2db0bdc30cdb15eb733a5 +Subproject commit 1c03574ac49a1fa1ca0c7e8243a91e2fb6b907ff From 5a4e6d848b0926232b57236d6ba247a37981f964 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 22 Mar 2016 14:16:19 -0700 Subject: [PATCH 094/117] WIP: attempt to give classmethod first arg a reasonable type. Fixes #292 --- mypy/semanal.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/mypy/semanal.py b/mypy/semanal.py index a68f31d8c92f..3f8628f2e033 100644 --- a/mypy/semanal.py +++ b/mypy/semanal.py @@ -306,9 +306,7 @@ def prepare_method_signature(self, func: FuncDef) -> None: self.fail('Method must have at least one argument', func) elif func.type: sig = cast(FunctionLike, func.type) - # TODO: A classmethod's first argument should be more - # precisely typed than Any. - leading_type = AnyType() if func.is_class else self_type(self.type) + leading_type = self.class_subtype(self.type) if func.is_class else self_type(self.type) func.type = replace_implicit_first_type(sig, leading_type) def is_conditional_func(self, previous: Node, new: FuncDef) -> bool: @@ -808,6 +806,16 @@ def analyze_metaclass(self, defn: ClassDef) -> None: def object_type(self) -> Instance: return self.named_type('__builtins__.object') + def class_subtype(self, info: TypeInfo) -> Type: + # Construct a function type whose fallback is cls. + from mypy import checkmember # To avoid import cycle. + leading_type = checkmember.type_object_type(info, self.builtin_type) + if isinstance(leading_type, Overloaded): + # Overloaded __init__ is too complex to handle. Plus it's stubs only. + return AnyType() + else: + return leading_type + def named_type(self, qualified_name: str, args: List[Type] = None) -> Instance: sym = self.lookup_qualified(qualified_name, None) return Instance(cast(TypeInfo, sym.node), args or []) From c45a1f1ca12f9d148dd9406d8de105466df5006c Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 22 Mar 2016 17:27:20 -0700 Subject: [PATCH 095/117] Fix TypeAnalyser.visit_callable_type() to preserve fallback if present (found by Jukka). --- mypy/typeanal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypy/typeanal.py b/mypy/typeanal.py index 1bf0a6338e4b..f7cfd8ba10fb 100644 --- a/mypy/typeanal.py +++ b/mypy/typeanal.py @@ -179,7 +179,7 @@ def visit_type_var(self, t: TypeVarType) -> Type: def visit_callable_type(self, t: CallableType) -> Type: return t.copy_modified(arg_types=self.anal_array(t.arg_types), ret_type=t.ret_type.accept(self), - fallback=self.builtin_type('builtins.function'), + fallback=t.fallback or self.builtin_type('builtins.function'), variables=self.anal_var_defs(t.variables), bound_vars=self.anal_bound_vars(t.bound_vars)) From 601e0aeb337d9185dacc56b31513f2031784ab86 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 22 Mar 2016 17:34:48 -0700 Subject: [PATCH 096/117] Make tests pass. --- mypy/test/data/semanal-classes.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mypy/test/data/semanal-classes.test b/mypy/test/data/semanal-classes.test index b05c49ac5876..71af09a7af8f 100644 --- a/mypy/test/data/semanal-classes.test +++ b/mypy/test/data/semanal-classes.test @@ -458,7 +458,7 @@ MypyFile:1( Args( Var(cls) Var(z)) - def (cls: Any, z: builtins.int) -> builtins.str + def (cls: def () -> __main__.A, z: builtins.int) -> builtins.str Class Block:3( PassStmt:3()))))) @@ -478,7 +478,7 @@ MypyFile:1( f Args( Var(cls)) - def (cls: Any) -> builtins.str + def (cls: def () -> __main__.A) -> builtins.str Class Block:3( PassStmt:3()))))) From 8a2dbbd6e449bf87844d4301e78d03ef00de413e Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 22 Mar 2016 20:57:02 -0700 Subject: [PATCH 097/117] Properly type-check C().some_class_method. --- mypy/checkmember.py | 15 ++++++++++++--- mypy/messages.py | 3 +++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/mypy/checkmember.py b/mypy/checkmember.py index 393e510f9609..8eaad8acffb8 100644 --- a/mypy/checkmember.py +++ b/mypy/checkmember.py @@ -187,7 +187,7 @@ def analyze_var(name: str, var: Var, itype: Instance, info: TypeInfo, node: Cont # methods: the former to the instance, the latter to the # class. functype = cast(FunctionLike, t) - check_method_type(functype, itype, node, msg) + check_method_type(functype, itype, var.is_classmethod, node, msg) signature = method_type(functype) if var.is_property: # A property cannot have an overloaded type => the cast @@ -228,17 +228,26 @@ def lookup_member_var_or_accessor(info: TypeInfo, name: str, return None -def check_method_type(functype: FunctionLike, itype: Instance, +def check_method_type(functype: FunctionLike, itype: Instance, is_classmethod: bool, context: Context, msg: MessageBuilder) -> None: for item in functype.items(): if not item.arg_types or item.arg_kinds[0] not in (ARG_POS, ARG_STAR): # No positional first (self) argument (*args is okay). msg.invalid_method_type(item, context) - else: + elif not is_classmethod: # Check that self argument has type 'Any' or valid instance type. selfarg = item.arg_types[0] if not subtypes.is_equivalent(selfarg, itype): msg.invalid_method_type(item, context) + else: + # Check that cls argument has type 'Any' or valid class type. + clsarg = item.arg_types[0] + if isinstance(clsarg, CallableType) and clsarg.is_type_obj(): + if not subtypes.is_equivalent(clsarg.ret_type, itype): + msg.invalid_class_method_type(item, context) + else: + if not subtypes.is_equivalent(clsarg, AnyType()): + msg.invalid_class_method_type(item, context) def analyze_class_attribute_access(itype: Instance, diff --git a/mypy/messages.py b/mypy/messages.py index 7557fb05bed0..3b539ecf5b57 100644 --- a/mypy/messages.py +++ b/mypy/messages.py @@ -733,6 +733,9 @@ def cannot_determine_type_in_base(self, name: str, base: str, context: Context) def invalid_method_type(self, sig: CallableType, context: Context) -> None: self.fail('Invalid method type', context) + def invalid_class_method_type(self, sig: CallableType, context: Context) -> None: + self.fail('Invalid class method type', context) + def incompatible_conditional_function_def(self, defn: FuncDef) -> None: self.fail('All conditional function variants must have identical ' 'signatures', defn) From 7ff4bc42eaec949998b1abf8576a6aa51f2c24a7 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 23 Mar 2016 07:47:17 -0700 Subject: [PATCH 098/117] Rewrite long line. --- mypy/semanal.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mypy/semanal.py b/mypy/semanal.py index 3f8628f2e033..cbd8649b0cac 100644 --- a/mypy/semanal.py +++ b/mypy/semanal.py @@ -306,7 +306,10 @@ def prepare_method_signature(self, func: FuncDef) -> None: self.fail('Method must have at least one argument', func) elif func.type: sig = cast(FunctionLike, func.type) - leading_type = self.class_subtype(self.type) if func.is_class else self_type(self.type) + if func.is_class: + leading_type = self.class_subtype(self.type) + else: + leading_type = self_type(self.type) func.type = replace_implicit_first_type(sig, leading_type) def is_conditional_func(self, previous: Node, new: FuncDef) -> bool: From f4770fac1f6058cfb89212be7a2c06a80f3dcce4 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 23 Mar 2016 08:18:35 -0700 Subject: [PATCH 099/117] Rename class_subtype to class_type. --- mypy/semanal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mypy/semanal.py b/mypy/semanal.py index cbd8649b0cac..ffc371fc17ac 100644 --- a/mypy/semanal.py +++ b/mypy/semanal.py @@ -307,7 +307,7 @@ def prepare_method_signature(self, func: FuncDef) -> None: elif func.type: sig = cast(FunctionLike, func.type) if func.is_class: - leading_type = self.class_subtype(self.type) + leading_type = self.class_type(self.type) else: leading_type = self_type(self.type) func.type = replace_implicit_first_type(sig, leading_type) @@ -809,7 +809,7 @@ def analyze_metaclass(self, defn: ClassDef) -> None: def object_type(self) -> Instance: return self.named_type('__builtins__.object') - def class_subtype(self, info: TypeInfo) -> Type: + def class_type(self, info: TypeInfo) -> Type: # Construct a function type whose fallback is cls. from mypy import checkmember # To avoid import cycle. leading_type = checkmember.type_object_type(info, self.builtin_type) From 8812d18cdfce8fa7b9f272784e41e364373207a0 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 23 Mar 2016 08:37:43 -0700 Subject: [PATCH 100/117] Add unit tests. --- mypy/test/data/check-classes.test | 36 +++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/mypy/test/data/check-classes.test b/mypy/test/data/check-classes.test index d35331211386..2a797de065cc 100644 --- a/mypy/test/data/check-classes.test +++ b/mypy/test/data/check-classes.test @@ -763,6 +763,42 @@ class A: A().f = A.f # E: Cannot assign to a method [builtins fixtures/classmethod.py] +[case testClassMethodCalledInClassMethod] +import typing +class C: + @classmethod + def foo(cls) -> None: pass + @classmethod + def bar(cls) -> None: + cls() + cls(1) # E: Too many arguments for "C" + cls.bar() + cls.bar(1) # E: Too many arguments for "bar" of "C" + cls.bozo() # E: "C" has no attribute "bozo" +[builtins fixtures/classmethod.py] +[out] +main: note: In member "bar" of class "C": + +[case testClassMethodCalledOnClass] +import typing +class C: + @classmethod + def foo(cls) -> None: pass +C.foo() +C.foo(1) # E: Too many arguments for "foo" of "C" +C.bozo() # E: "C" has no attribute "bozo" +[builtins fixtures/classmethod.py] + +[case testClassMethodCalledOnInstance] +import typing +class C: + @classmethod + def foo(cls) -> None: pass +C().foo() +C().foo(1) # E: Too many arguments for "foo" of "C" +C.bozo() # E: "C" has no attribute "bozo" +[builtins fixtures/classmethod.py] + -- Properties -- ---------- From 2b712dfdbd971e9b13e90221d92f9de68bca89e6 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 23 Mar 2016 08:42:55 -0700 Subject: [PATCH 101/117] Add a comment to the cls argument check. --- mypy/checkmember.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mypy/checkmember.py b/mypy/checkmember.py index 8eaad8acffb8..3f0638710e10 100644 --- a/mypy/checkmember.py +++ b/mypy/checkmember.py @@ -241,6 +241,9 @@ def check_method_type(functype: FunctionLike, itype: Instance, is_classmethod: b msg.invalid_method_type(item, context) else: # Check that cls argument has type 'Any' or valid class type. + # (This is sufficient for the current treatment of @classmethod, + # but probably needs to be revisited when we implement Type[C] + # or advanced variants of it like Type[, C].) clsarg = item.arg_types[0] if isinstance(clsarg, CallableType) and clsarg.is_type_obj(): if not subtypes.is_equivalent(clsarg.ret_type, itype): From e5f316bee26aff035f9d9febc25e0023ff530345 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 23 Mar 2016 11:57:34 -0700 Subject: [PATCH 102/117] It should not be an error to call an abstract method from a class method! --- mypy/checkexpr.py | 2 +- mypy/checkmember.py | 4 +++- mypy/test/data/check-classes.test | 12 ++++++++++++ mypy/types.py | 8 ++++++++ 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/mypy/checkexpr.py b/mypy/checkexpr.py index ccbdfaf8db3a..d342ec063167 100644 --- a/mypy/checkexpr.py +++ b/mypy/checkexpr.py @@ -218,7 +218,7 @@ def check_call(self, callee: Type, args: List[Node], """ arg_messages = arg_messages or self.msg if isinstance(callee, CallableType): - if callee.is_type_obj() and callee.type_object().is_abstract: + if callee.is_concrete_type_obj() and callee.type_object().is_abstract: type = callee.type_object() self.msg.cannot_instantiate_abstract_class( callee.type_object().name(), type.abstract_attributes, diff --git a/mypy/checkmember.py b/mypy/checkmember.py index 3f0638710e10..a67363c1caca 100644 --- a/mypy/checkmember.py +++ b/mypy/checkmember.py @@ -382,7 +382,9 @@ def class_callable(init_type: CallableType, info: TypeInfo, type_type: Instance) callable_type = init_type.copy_modified( ret_type=self_type(info), fallback=type_type, name=None, variables=variables) c = callable_type.with_name('"{}"'.format(info.name())) - return convert_class_tvars_to_func_tvars(c, len(initvars)) + cc = convert_class_tvars_to_func_tvars(c, len(initvars)) + cc.is_classmethod_class = True + return cc def convert_class_tvars_to_func_tvars(callable: CallableType, diff --git a/mypy/test/data/check-classes.test b/mypy/test/data/check-classes.test index 2a797de065cc..dd6b5d2c06fb 100644 --- a/mypy/test/data/check-classes.test +++ b/mypy/test/data/check-classes.test @@ -799,6 +799,18 @@ C().foo(1) # E: Too many arguments for "foo" of "C" C.bozo() # E: "C" has no attribute "bozo" [builtins fixtures/classmethod.py] +[case testClassMethodMayCallAbstractMethod] +from abc import abstractmethod +import typing +class C: + @classmethod + def foo(cls) -> None: + cls().bar() + @abstractmethod + def bar(self) -> None: + pass +[builtins fixtures/classmethod.py] + -- Properties -- ---------- diff --git a/mypy/types.py b/mypy/types.py index 455dcc242116..cc577a89c626 100644 --- a/mypy/types.py +++ b/mypy/types.py @@ -231,6 +231,9 @@ class FunctionLike(Type): @abstractmethod def is_type_obj(self) -> bool: pass + def is_concrete_type_obj(self) -> bool: + return self.is_type_obj() + @abstractmethod def type_object(self) -> mypy.nodes.TypeInfo: pass @@ -278,6 +281,8 @@ class CallableType(FunctionLike): # Is this Callable[..., t] (with literal '...')? is_ellipsis_args = False + # Is this callable constructed for the benefit of a classmethod's 'cls' argument? + is_classmethod_class = False # Was this type implicitly generated instead of explicitly specified by the user? implicit = False @@ -343,6 +348,9 @@ def copy_modified(self, def is_type_obj(self) -> bool: return self.fallback.type.fullname() == 'builtins.type' + def is_concrete_type_obj(self) -> bool: + return self.is_type_obj() and self.is_classmethod_class + def type_object(self) -> mypy.nodes.TypeInfo: assert self.is_type_obj() ret = self.ret_type From 28aabb670696d36577fcb910ca9e9e25cc946442 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 23 Mar 2016 12:32:40 -0700 Subject: [PATCH 103/117] Propagate implicit and is_classmethod_class in CallableType.copy_modified(). --- mypy/types.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mypy/types.py b/mypy/types.py index cc577a89c626..792a360209b4 100644 --- a/mypy/types.py +++ b/mypy/types.py @@ -297,7 +297,9 @@ def __init__(self, arg_types: List[Type], bound_vars: List[Tuple[int, Type]] = None, line: int = -1, is_ellipsis_args: bool = False, - implicit=False) -> None: + implicit=False, + is_classmethod_class=False, + ) -> None: if variables is None: variables = [] if not bound_vars: @@ -343,6 +345,8 @@ def copy_modified(self, line=line if line is not _dummy else self.line, is_ellipsis_args=( is_ellipsis_args if is_ellipsis_args is not _dummy else self.is_ellipsis_args), + implicit=self.implicit, + is_classmethod_class=self.is_classmethod_class, ) def is_type_obj(self) -> bool: From 8ff538feeea17cb74a6d075efbe86c5b45fed6d7 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 23 Mar 2016 16:04:23 -0700 Subject: [PATCH 104/117] [De]serialize is_classmethod_class. --- mypy/types.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mypy/types.py b/mypy/types.py index 309feac4e576..66038968d459 100644 --- a/mypy/types.py +++ b/mypy/types.py @@ -545,6 +545,7 @@ def serialize(self) -> JsonDict: 'bound_vars': [[x, y.serialize()] for x, y in self.bound_vars], 'is_ellipsis_args': self.is_ellipsis_args, 'implicit': self.implicit, + 'is_classmethod_class': self.is_classmethod_class, } @classmethod @@ -562,6 +563,7 @@ def deserialize(cls, data: JsonDict) -> 'CallableType': bound_vars=[(x, Type.deserialize(y)) for x, y in data['bound_vars']], is_ellipsis_args=data['is_ellipsis_args'], implicit=data['implicit'], + is_classmethod_class=data['is_classmethod_class'], ) From 30966e00769b2f52e3e3c82a77f57f8319257907 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 23 Mar 2016 16:15:40 -0700 Subject: [PATCH 105/117] Clear trailing whitespace. --- mypy/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypy/build.py b/mypy/build.py index d409145dc961..6c6869210a77 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -64,7 +64,7 @@ # Disallow defining untyped (or incompletely typed) functions DISALLOW_UNTYPED_DEFS = 'disallow-untyped-defs' # Type check unannotated functions -CHECK_UNTYPED_DEFS = 'check-untyped-defs' +CHECK_UNTYPED_DEFS = 'check-untyped-defs' PYTHON_EXTENSIONS = ['.pyi', '.py'] From cd5ec0d678b04fe194e30a89001ad50fba05b8aa Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 5 Apr 2016 17:24:02 -0700 Subject: [PATCH 106/117] Remove all print statements and pdb calls from fixup.py. Add asserts for error conditions instead. --- mypy/fixup.py | 43 +++++++++---------------------------------- 1 file changed, 9 insertions(+), 34 deletions(-) diff --git a/mypy/fixup.py b/mypy/fixup.py index 0a13223493e6..213c957283f3 100644 --- a/mypy/fixup.py +++ b/mypy/fixup.py @@ -14,26 +14,18 @@ def fixup_module_pass_one(tree: MypyFile, modules: Dict[str, MypyFile]) -> None: node_fixer = NodeFixer(modules) node_fixer.visit_symbol_table(tree.names) - # print('Done pass 1', tree.fullname()) def fixup_module_pass_two(tree: MypyFile, modules: Dict[str, MypyFile]) -> None: compute_all_mros(tree.names, modules) - # print('Done pass 2', tree.fullname()) def compute_all_mros(symtab: SymbolTable, modules: Dict[str, MypyFile]) -> None: for key, value in symtab.items(): if value.kind in (LDEF, MDEF, GDEF) and isinstance(value.node, TypeInfo): info = value.node - # print(' Calc MRO for', info.fullname()) - try: - info.calculate_mro() - except Exception: - import pdb - pdb.set_trace() - if not info.mro: - print('*** No MRO calculated for', info.fullname()) + info.calculate_mro() + assert info.mro, "No MRO calculated for %s" % (info.fullname(),) compute_all_mros(info.names, modules) @@ -52,12 +44,10 @@ def visit_type_info(self, info: TypeInfo) -> None: save_info = self.current_info try: self.current_info = info - # print('Descending into', info.fullname()) if info.defn: info.defn.accept(self) if info.names: self.visit_symbol_table(info.names) - # print('Fixing up', info.fullname()) if info.subtypes: for st in info.subtypes: self.visit_type_info(st) @@ -82,11 +72,9 @@ def visit_symbol_table(self, symtab: SymbolTable) -> None: value.node = self.modules[cross_ref] else: stnode = lookup_qualified_stnode(self.modules, cross_ref) - if stnode is None: - print("*** Could not find cross-reference", cross_ref) - else: - value.node = stnode.node - value.type_override = stnode.type_override + assert stnode is not None, "Could not find cross-ref %s" % (cross_ref,) + value.node = stnode.node + value.type_override = stnode.type_override else: if isinstance(value.node, TypeInfo): # TypeInfo has no accept(). TODO: Add it? @@ -246,13 +234,9 @@ def lookup_qualified_stnode(modules: Dict[str, MypyFile], name: str) -> SymbolTa break names = mod.names while True: - if not rest: - print('*** Cannot find', name) - return None + assert rest, "Cannot find %s" % (name,) key = rest.pop() - if key not in names: - print('*** Cannot find', key, 'for', name) - return None + assert key in names, "Cannot find %s for %s" % (key, name) stnode = names[key] if not rest: return stnode @@ -262,7 +246,6 @@ def lookup_qualified_stnode(modules: Dict[str, MypyFile], name: str) -> SymbolTa def store_qualified(modules: Dict[str, MypyFile], name: str, info: SymbolNode) -> None: - print("store_qualified", name, repr(info)) head = name rest = [] while True: @@ -273,25 +256,17 @@ def store_qualified(modules: Dict[str, MypyFile], name: str, info: SymbolNode) - break names = mod.names while True: - if not rest: - print('*** Cannot find', name) - import pdb # type: ignore - pdb.set_trace() - return + assert rest, "Cannot find %s" % (name,) key = rest.pop() if key not in names: - if rest: - print('*** Cannot find', key, 'for', name) - return + assert not rest, "Cannot find %s for %s" % (key, name) # Store it. # TODO: kind might be something else? names[key] = SymbolTableNode(GDEF, info) - print('Stored', names[key]) return stnode = names[key] node = stnode.node if not rest: - print('*** Overwriting!', name, stnode) stnode.node = info return assert isinstance(node, TypeInfo) From 9d2c327933f40658b986c47a6645b54768edccab Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 5 Apr 2016 21:50:22 -0700 Subject: [PATCH 107/117] Respond to code review for nodes.py. Serialize some additional fields, clarify a few places, add some comments, replace a print() with an assert. --- mypy/nodes.py | 71 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 18 deletions(-) diff --git a/mypy/nodes.py b/mypy/nodes.py index 1d9816132f36..0e9e44b18fef 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -113,7 +113,8 @@ def get_line(self) -> int: def accept(self, visitor: NodeVisitor[T]) -> T: raise RuntimeError('Not implemented') - # @abstractmethod # TODO + # NOTE: Can't use @abstractmethod, since many subclasses of Node + # don't implement serialize(). def serialize(self) -> Any: raise NotImplementedError('Cannot serialize {} instance'.format(self.__class__.__name__)) @@ -202,6 +203,7 @@ def serialize(self) -> JsonDict: '_fullname': self._fullname, 'names': self.names.serialize(self._fullname), 'is_stub': self.is_stub, + 'path': self.path, } @classmethod @@ -212,6 +214,7 @@ def deserialize(cls, data: JsonDict) -> 'MypyFile': tree._fullname = data['_fullname'] tree.names = SymbolTable.deserialize(data['names']) tree.is_stub = data['is_stub'] + tree.path = data['path'] return tree @@ -314,6 +317,8 @@ def serialize(self) -> JsonDict: return {'.class': 'OverloadedFuncDef', 'items': [i.serialize() for i in self.items], 'type': None if self.type is None else self.type.serialize(), + 'fullname': self._fullname, + 'is_property': self.is_property, } @classmethod @@ -322,6 +327,9 @@ def deserialize(cls, data: JsonDict) -> 'OverloadedFuncDef': res = OverloadedFuncDef([Decorator.deserialize(d) for d in data['items']]) if data.get('type') is not None: res.type = mypy.types.Type.deserialize(data['type']) + res._fullname = data['fullname'] + res.is_property = data['is_property'] + # NOTE: res.info will be set in the fixup phase. return res @@ -462,8 +470,15 @@ def serialize(self) -> JsonDict: 'fullname': self._fullname, 'arguments': [a.serialize() for a in self.arguments], 'type': None if self.type is None else self.type.serialize(), + 'is_property': self.is_property, + 'is_overload': self.is_overload, + 'is_generator': self.is_generator, + 'is_static': self.is_static, 'is_class': self.is_class, - # TODO: Various other flags + 'is_decorated': self.is_decorated, + 'is_conditional': self.is_conditional, + 'is_abstract': self.is_abstract, + # TODO: Do we need expanded, original_def? } @classmethod @@ -476,7 +491,15 @@ def deserialize(cls, data: JsonDict) -> 'FuncDef': (None if data['type'] is None else mypy.types.FunctionLike.deserialize(data['type']))) ret._fullname = data['fullname'] + ret.is_property = data['is_property'] + ret.is_overload = data['is_overload'] + ret.is_generator = data['is_generator'] + ret.is_static = data['is_static'] ret.is_class = data['is_class'] + ret.is_decorated = data['is_decorated'] + ret.is_conditional = data['is_conditional'] + ret.is_abstract = data['is_abstract'] + # NOTE: ret.info is set in the fixup phase. return ret @@ -562,12 +585,12 @@ def accept(self, visitor: NodeVisitor[T]) -> T: def serialize(self) -> JsonDict: # TODO: Leave default values out? + # NOTE: Sometimes self.is_ready is False here, but we don't care. data = {'.class': 'Var', 'name': self._name, 'fullname': self._fullname, 'type': None if self.type is None else self.type.serialize(), 'is_self': self.is_self, - 'is_ready': self.is_ready, # TODO: is this needed? 'is_initialized_in_class': self.is_initialized_in_class, 'is_staticmethod': self.is_staticmethod, 'is_classmethod': self.is_classmethod, @@ -630,7 +653,7 @@ def is_generic(self) -> bool: return self.info.is_generic() def serialize(self) -> JsonDict: - # Not serialized: defs, base_type_exprs + # Not serialized: defs, base_type_exprs,d ecorators return {'.class': 'ClassDef', 'name': self.name, 'fullname': self.fullname, @@ -1066,6 +1089,8 @@ def accept(self, visitor: NodeVisitor[T]) -> T: return visitor.visit_name_expr(self) def serialize(self) -> JsonDict: + # TODO: Find out where and why NameExpr is being serialized (if at all). + assert False, "Serializing NameExpr: %s" % (self,) return {'.class': 'NameExpr', 'kind': self.kind, 'node': None if self.node is None else self.node.serialize(), @@ -1814,7 +1839,7 @@ def calculate_mro(self) -> None: Raise MroError if cannot determine mro. """ mro = linearize_hierarchy(self) - if mro is None: return # TODO: Or raise MroError()? + assert mro, "Could not produce a MRO at all for %s" % (self,) self.mro = mro def has_base(self, fullname: str) -> bool: @@ -1862,10 +1887,10 @@ def __str__(self) -> str: 'TypeInfo') def serialize(self) -> Union[str, JsonDict]: + # NOTE: This is where all ClassDefs originate, so there shouldn't be duplicates. data = {'.class': 'TypeInfo', 'fullname': self.fullname(), 'alt_fullname': self.alt_fullname, - 'subtypes': [t.serialize() for t in self.subtypes], 'names': self.names.serialize(self.alt_fullname or self.fullname()), 'defn': self.defn.serialize(), 'is_abstract': self.is_abstract, @@ -1887,7 +1912,7 @@ def deserialize(cls, data: JsonDict) -> 'TypeInfo': ti = TypeInfo(names, defn) ti._fullname = data['fullname'] ti.alt_fullname = data['alt_fullname'] - ti.subtypes = {TypeInfo.deserialize(t) for t in data['subtypes']} + # TODO: Is there a reason to reconstruct ti.subtypes? ti.is_abstract = data['is_abstract'] ti.abstract_attributes = data['abstract_attributes'] ti.is_enum = data['is_enum'] @@ -1970,6 +1995,12 @@ def __str__(self) -> str: return s def serialize(self, prefix: str, name: str) -> JsonDict: + """Serialize a SymbolTableNode. + + Args: + prefix: full name of the containing module or class; or None + name: name of this object relative to the containing object + """ data = {'.class': 'SymbolTableNode', 'kind': node_kinds[self.kind], } # type: JsonDict @@ -1978,13 +2009,15 @@ def serialize(self, prefix: str, name: str) -> JsonDict: if not self.module_public: data['module_public'] = False if self.kind == MODULE_REF: - if self.node is None: - print("*** Missing module cross ref in %s for %s" % (prefix, name)) - else: - data['cross_ref'] = self.node.fullname() + assert self.node is not None, "Missing module cross ref in %s for %s" % (prefix, name) + data['cross_ref'] = self.node.fullname() else: if self.node is not None: if prefix is not None: + # Check whether this is an alias for another object. + # If the object's canonical full name differs from + # the full name computed from prefix and name, + # it's an alias, and we serialize it as a cross ref. if isinstance(self.node, TypeInfo): fullname = self.node.alt_fullname or self.node.fullname() else: @@ -1995,7 +2028,7 @@ def serialize(self, prefix: str, name: str) -> JsonDict: return data data['node'] = self.node.serialize() if self.type_override is not None: - data['type'] = self.type.serialize() + data['type_override'] = self.type_override.serialize() return data @classmethod @@ -2003,7 +2036,7 @@ def deserialize(cls, data: JsonDict) -> 'SymbolTableNode': assert data['.class'] == 'SymbolTableNode' kind = inverse_node_kinds[data['kind']] if 'cross_ref' in data: - # This needs to be fixed up in a later pass. + # This will be fixed up later. stnode = SymbolTableNode(kind, None) stnode.cross_ref = data['cross_ref'] else: @@ -2011,8 +2044,8 @@ def deserialize(cls, data: JsonDict) -> 'SymbolTableNode': if 'node' in data: node = SymbolNode.deserialize(data['node']) typ = None - if 'type' in data: - typ = mypy.types.Type.deserialize(data['type']) + if 'type_override' in data: + typ = mypy.types.Type.deserialize(data['type_override']) stnode = SymbolTableNode(kind, node, typ=typ) if 'tvar_id' in data: stnode.tvar_id = data['tvar_id'] @@ -2041,6 +2074,10 @@ def __str__(self) -> str: def serialize(self, fullname: str) -> JsonDict: data = {'.class': 'SymbolTable'} # type: JsonDict for key, value in self.items(): + # Skip __builtins__: it's a reference to the builtins + # module that gets added to every module by + # SemanticAnalyzer.visit_file(), but it shouldn't be + # accessed by users of the module. if key == '__builtins__': continue data[key] = value.serialize(fullname, key) @@ -2121,9 +2158,7 @@ def linearize_hierarchy(info: TypeInfo) -> Optional[List[TypeInfo]]: bases = info.direct_base_classes() lin_bases = [] for base in bases: - if base is None: - print('*** Cannot linearize bases for', info.fullname(), bases) - return None + assert base is not None, "Cannot linearize bases for %s %s" % (info.fullname(), bases) more_bases = linearize_hierarchy(base) if more_bases is None: return None From 3a2cc0bd850e2546d89a4742cccfda4625f1f23b Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 6 Apr 2016 08:22:50 -0700 Subject: [PATCH 108/117] Respond to code review for types.py. Made a few things non-optional, removed some duplicate defs, added a TODO comment. --- mypy/types.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/mypy/types.py b/mypy/types.py index 7ca1d1fb774a..ad0b23c20471 100644 --- a/mypy/types.py +++ b/mypy/types.py @@ -292,12 +292,10 @@ def accept(self, visitor: 'TypeVisitor[T]') -> T: def serialize(self) -> JsonDict: data = {'.class': 'Instance', } # type: JsonDict - if self.type is not None: - data['type_ref'] = self.type.alt_fullname or self.type.fullname() + assert self.type is not None + data['type_ref'] = self.type.alt_fullname or self.type.fullname() if self.args: data['args'] = [arg.serialize() for arg in self.args] - if self.erased: - data['erased'] = True return data @classmethod @@ -308,10 +306,8 @@ def deserialize(cls, data: JsonDict) -> 'Instance': args_list = data['args'] assert isinstance(args_list, list) args = [Type.deserialize(arg) for arg in args_list] - inst = Instance(None, args, erased=data.get('erased', False)) - if 'type_ref' in data: - inst.type_ref = data['type_ref'] - # Will be fixed up by fixup.py later. + inst = Instance(None, args) + inst.type_ref = data['type_ref'] # Will be fixed up by fixup.py later. return inst @@ -398,7 +394,6 @@ class CallableType(FunctionLike): min_args = 0 # Minimum number of arguments; derived from arg_kinds is_var_arg = False # Is it a varargs function? derived from arg_kinds ret_type = None # type: Type # Return value type - fallback = None # type: Instance name = '' # Name (may be None; for error messages) definition = None # type: SymbolNode # For error messages. May be None. # Type variables for a generic function @@ -496,9 +491,6 @@ def is_type_obj(self) -> bool: def is_concrete_type_obj(self) -> bool: return self.is_type_obj() and self.is_classmethod_class - def is_concrete_type_obj(self) -> bool: - return self.is_type_obj() and self.is_classmethod_class - def type_object(self) -> mypy.nodes.TypeInfo: assert self.is_type_obj() ret = self.ret_type @@ -535,6 +527,8 @@ def type_var_ids(self) -> List[int]: return a def serialize(self) -> JsonDict: + # TODO: As an optimization, leave out everything related to + # generic functions for non-generic functions. return {'.class': 'CallableType', 'arg_types': [(None if t is None else t.serialize()) for t in self.arg_types], From ba1553d9e60a45a477a6ddaeda22bc63073dc80b Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 6 Apr 2016 12:59:28 -0700 Subject: [PATCH 109/117] Respond to code review for build.py. - Add/update many docstrings and comments. - Remove dead code at end of write_cache(). - Turn cache inconsistency into assert False. - Use keyword args for State() constructor calls. --- mypy/build.py | 179 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 116 insertions(+), 63 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 316a9a6c8821..7edd894da9ef 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -629,6 +629,17 @@ def read_with_python_encoding(path: str, pyversion: Tuple[int, int]) -> str: def get_cache_names(id: str, path: str, pyversion: Tuple[int, int]) -> Tuple[str, str]: + """Return the file names for the cache files. + + Args: + id: module ID + path: module path (used to recognize packages) + pyversion: Python version (major, minor) + + Returns: + A tuple with the file names to be used for the meta JSON and the + data JSON, respectively. + """ prefix = os.path.join(MYPY_CACHE, '%d.%d' % pyversion, *id.split('.')) is_package = os.path.basename(path).startswith('__init__.py') if is_package: @@ -637,6 +648,19 @@ def get_cache_names(id: str, path: str, pyversion: Tuple[int, int]) -> Tuple[str def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[CacheMeta]: + """Find cache data for a module. + + Args: + id: module ID + path: module path + manager: the build manager (for pyversion, log/trace, and build options) + + Returns: + A CacheMeta instance if the cache data was found and appears + valid; otherwise None. + """ + # TODO: May need to take more build options into account; in + # particular SILENT_IMPORTS may affect the cache dramatically. meta_json, data_json = get_cache_names(id, path, manager.pyversion) manager.trace('Looking for {} {}'.format(id, data_json)) if not os.path.exists(meta_json): @@ -681,6 +705,15 @@ def random_string(): def write_cache(id: str, path: str, tree: MypyFile, dependencies: List[str], manager: BuildManager) -> None: + """Write cache files for a module. + + Args: + id: module ID + path: module path + tree: the fully checked module data + dependencies: module IDs on which this module depends + manager: the build manager (for pyversion, log/trace) + """ path = os.path.abspath(path) manager.trace('Dumping {} {}'.format(id, path)) st = os.stat(path) # TODO: Errors @@ -710,60 +743,11 @@ def write_cache(id: str, path: str, tree: MypyFile, dependencies: List[str], with open(meta_json_tmp, 'w') as f: json.dump(meta, f, sort_keys=True) f.write('\n') + # TODO: On Windows, os.rename() may not be atomic, and we could + # use os.replace(). However that's new in Python 3.3. os.rename(data_json_tmp, data_json) os.rename(meta_json_tmp, meta_json) - return - - # Now, as a test, read it back. - print() - print('Reading what we wrote for', id, 'from', data_json) - with open(data_json, 'r') as f: - new_data = json.load(f) - assert new_data == data - new_tree = MypyFile.deserialize(new_data) - new_names = new_tree.names - new_keys = sorted(new_names) - - print('Fixing up', id) - fixup.fixup_module_pass_one(new_tree, manager.modules) - - print('Comparing keys', id) - old_tree = tree - old_names = old_tree.names - old_keys = sorted(old_names) - if new_keys != old_keys: - for key in new_keys: - if key not in old_keys: - print(' New key', key, 'not found in old tree') - for key in old_keys: - if key not in new_keys: - v = old_names[key] - if key != '__builtins__' and v.module_public: - print(' Old key', key, 'not found in new tree') - - print('Comparing values', id) - modules = manager.modules - for key in old_keys: - if key not in new_keys: - continue - oldv = old_names[key] - newv = new_names[key] - if newv.mod_id != oldv.mod_id: - newv.mod_id = id # XXX Hack - if newv.kind == MODULE_REF and newv.node is None: - fn = oldv.node.fullname() - if fn in modules: - newv.node = modules[fn] - else: - print('*** Cannot fix up reference to module', fn, 'for', key) - if str(oldv) != str(newv): - print(' ', key, 'old', oldv) - print(' ', ' ' * len(key), 'new', newv) - import pdb # type: ignore - pdb.set_trace() - print() - """Dependency manager. @@ -892,8 +876,8 @@ def write_cache(id: str, path: str, tree: MypyFile, dependencies: List[str], cached data; but because the node is part of a cycle we can't technically type-check it until the semantic analysis of all other nodes in the cycle has completed. (This is an important issue because -we have a cycle of over 500 modules in the server repo. But I'd like -to deal with it later.) +Dropbox has a very large cycle in production code. But I'd like to +deal with it later.) Additional wrinkles ------------------- @@ -928,10 +912,20 @@ class State: data = None # type: Optional[str] tree = None # type: Optional[MypyFile] dependencies = None # type: List[str] - dep_line_map = None # tyoe: Dict[str, int] # Line number where imported + + # Map each dependency to the line number where it is first imported + dep_line_map = None # type: Dict[str, int] + + # Parent package, its parent, etc. ancestors = None # type: Optional[List[str]] + + # List of (path, line number) tuples giving context for import import_context = None # type: List[Tuple[str, int]] + + # The State from which this module was imported, if any caller_state = None # type: Optional[State] + + # If caller_state is set, the line number in the caller where the import occurred caller_line = 0 def __init__(self, @@ -1121,10 +1115,12 @@ def parse_file(self) -> None: # Double-check that the dependencies still match (otherwise # the graph is out of date). if self.dependencies is not None and dependencies != self.dependencies: - # TODO: Make this into a reasonable error message. - print("HELP!! Dependencies changed!") # Probably the file was edited. + # Presumably the file was edited while we were running. + # TODO: Make this into a reasonable error message, or recover somehow. + print("HELP!! Dependencies changed!") print(" Cached:", self.dependencies) print(" Source:", dependencies) + assert False, "Cache inconsistency for dependencies of %s" % (self.id,) self.dependencies = dependencies self.dep_line_map = dep_line_map self.check_blockers() @@ -1184,12 +1180,14 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> None: def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph: """Given some source files, load the full dependency graph.""" graph = {} # type: Graph - # The deque is used to implement breadth first traversal. + # The deque is used to implement breadth-first traversal. + # TODO: Consider whether to go depth-first instead. This may + # affect the order in which we process files within import cycles. new = collections.deque() # type: collections.deque[State] # Seed the graph with the initial root sources. for bs in sources: try: - st = State(bs.module, bs.path, bs.text, manager) + st = State(id=bs.module, path=bs.path, source=bs.text, manager=manager) except ModuleNotFound: continue if st.id in graph: @@ -1207,9 +1205,11 @@ def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph: if dep in st.ancestors: # TODO: Why not 'if dep not in st.dependencies' ? # Ancestors don't have import context. - newst = State(dep, None, None, manager, is_ancestor=True) + newst = State(id=dep, path=None, source=None, manager=manager, + is_ancestor=True) else: - newst = State(dep, None, None, manager, st, st.dep_line_map.get(dep, 1)) + newst = State(id=dep, path=None, source=None, manager=manager, + caller_state=st, caller_line=st.dep_line_map.get(dep, 1)) except ModuleNotFound: if dep in st.dependencies: st.dependencies.remove(dep) @@ -1225,15 +1225,25 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: sccs = sorted_components(graph) manager.log("Found %d SCCs; largest has %d nodes" % (len(sccs), max(len(scc) for scc in sccs))) + # We're processing SCCs from leaves (those without further + # dependencies) to roots (those from which everything else can be + # reached). for ascc in sccs: # Sort the SCC's nodes in *reverse* order or encounter. # This is a heuristic for handling import cycles. # Note that ascc is a set, and scc is a list. scc = sorted(ascc, key=lambda id: -graph[id].order) - # If builtins is in the list, move it last. + # If builtins is in the list, move it last. (This is a bit of + # a hack, but it's necessary because the builtins module is + # part of a small cycle involving at least {builtins, abc, + # typing}. Of these, builtins must be processed last or else + # some builtin objects will be incompletely processed.) if 'builtins' in ascc: scc.remove('builtins') scc.append('builtins') + # Because the SCCs are presented in topological sort order, we + # don't need to look at dependencies recursively for staleness + # -- the immediate dependencies are sufficient. stale_scc = {id for id in scc if not graph[id].is_fresh()} fresh = not stale_scc deps = set() @@ -1261,7 +1271,8 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: else: key = "dep:" manager.trace(" %5s %.0f %s" % (key, graph[id].meta.data_mtime, id)) - # If equal, give the benefit of the doubt, due to 1-sec time granularity. + # If equal, give the benefit of the doubt, due to 1-sec time granularity + # (on some platforms). if oldest_in_scc < newest_in_deps: fresh = False fresh_msg = "out of date by %.0f seconds" % (newest_in_deps - oldest_in_scc) @@ -1318,6 +1329,9 @@ def process_stale_scc(graph: Graph, scc: List[str]) -> None: def sorted_components(graph: Graph) -> List[AbstractSet[str]]: """Return the graph's SCCs, topologically sorted by dependencies. + The sort order is from leaves (nodes without dependencies) to + roots (nodes on which no other nodes depend). + This works for a subset of the full dependency graph too; dependencies that aren't present in graph.keys() are ignored. """ @@ -1336,7 +1350,7 @@ def sorted_components(graph: Graph) -> List[AbstractSet[str]]: data[frozenset(scc)] = deps res = [] for ready in topsort(data): - # Sort the sets in ready by reversed smallest State.order. Exampes: + # Sort the sets in ready by reversed smallest State.order. Examples: # # - If ready is [{x}, {y}], x.order == 1, y.order == 2, we get # [{y}, {x}]. @@ -1353,6 +1367,18 @@ def strongly_connected_components_path(vertices: Set[str], edges: Dict[str, List[str]]) -> Iterator[Set[str]]: """Compute Strongly Connected Components of a graph. + The graph is a DAG. + + Args: + vertices: the labels for the vertices + edges: for each vertex, gives the target vertices of its outgoing edges + + Returns: + An iterator yielding strongly connected components, each + represented as a set of vertices. Each input vertex will occur + exactly once; vertices not part of a SCC are returned as + singleton sets. + From http://code.activestate.com/recipes/578507/. """ identified = set() # type: Set[str] @@ -1392,6 +1418,33 @@ def topsort(data: Dict[AbstractSet[str], Set[AbstractSet[str]]]) -> Iterable[Set[AbstractSet[str]]]: """Topological sort. Consumes its argument. + Args: + data: A map from SCCs (represented as frozen sets of strings) to + sets of SCCs, its dependencies. NOTE: This data structure + is modified in place -- for normalization purposes, + self-dependencies are removed and entries representing + orphans are added. + + Returns: + An iterator yielding sets of SCCs that have an equivalent + ordering. NOTE: The algorithm doesn't care about the internal + structure of SCCs. + + Example: + Suppose the input has the following structure: + + {A: {B, C}, B: {D}, C: {D}} + + This is normalized to: + + {A: {B, C}, B: {D}, C: {D}, D: {}} + + The algorithm will yield the following values: + + {D} + {B, C} + {A} + From http://code.activestate.com/recipes/577413/. """ # TODO: Use a faster algorithm? From 84f0d86dd974927dd4207c70e266efe27cb49095 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 6 Apr 2016 15:15:19 -0700 Subject: [PATCH 110/117] Add a few unit tests for the graph processing in build.py. --- mypy/build.py | 12 +++++------ mypy/test/testgraph.py | 47 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 7 deletions(-) create mode 100644 mypy/test/testgraph.py diff --git a/mypy/build.py b/mypy/build.py index 7edd894da9ef..ed59f40af000 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1339,7 +1339,7 @@ def sorted_components(graph: Graph) -> List[AbstractSet[str]]: vertices = set(graph) edges = {id: [dep for dep in st.dependencies if dep in graph] for id, st in graph.items()} - sccs = list(strongly_connected_components_path(vertices, edges)) + sccs = list(strongly_connected_components(vertices, edges)) # Topsort. sccsmap = {id: frozenset(scc) for scc in sccs for id in scc} data = {} # type: Dict[AbstractSet[str], Set[AbstractSet[str]]] @@ -1363,11 +1363,9 @@ def sorted_components(graph: Graph) -> List[AbstractSet[str]]: return res -def strongly_connected_components_path(vertices: Set[str], - edges: Dict[str, List[str]]) -> Iterator[Set[str]]: - """Compute Strongly Connected Components of a graph. - - The graph is a DAG. +def strongly_connected_components(vertices: Set[str], + edges: Dict[str, List[str]]) -> Iterator[Set[str]]: + """Compute Strongly Connected Components of a directed graph. Args: vertices: the labels for the vertices @@ -1416,7 +1414,7 @@ def dfs(v: str) -> Iterator[Set[str]]: def topsort(data: Dict[AbstractSet[str], Set[AbstractSet[str]]]) -> Iterable[Set[AbstractSet[str]]]: - """Topological sort. Consumes its argument. + """Topological sort. Args: data: A map from SCCs (represented as frozen sets of strings) to diff --git a/mypy/test/testgraph.py b/mypy/test/testgraph.py new file mode 100644 index 000000000000..34c479746c17 --- /dev/null +++ b/mypy/test/testgraph.py @@ -0,0 +1,47 @@ +"""Test cases for graph processing code in build.py.""" + +from mypy.myunit import Suite, assert_equal +from mypy.build import BuildManager, State, TYPE_CHECK +from mypy.build import topsort, strongly_connected_components, sorted_components + + +class GraphSuite(Suite): + + def test_topsort(self): + a = frozenset({'A'}) + b = frozenset({'B'}) + c = frozenset({'C'}) + d = frozenset({'D'}) + data = {a: {b, c}, b: {d}, c: {d}} + res = list(topsort(data)) + assert_equal(res, [{d}, {b, c}, {a}]) + + def test_scc(self): + vertices = {'A', 'B', 'C', 'D'} + edges = {'A': ['B', 'C'], + 'B': ['C'], + 'C': ['B', 'D'], + 'D': []} + sccs = set(map(frozenset, strongly_connected_components(vertices, edges))) + assert_equal(sccs, + {frozenset({'A'}), + frozenset({'B', 'C'}), + frozenset({'D'})}) + + def test_sorted_components(self): + manager = BuildManager( + data_dir='', + lib_path=[], + target=TYPE_CHECK, + pyversion=(3, 5), + flags=[], + ignore_prefix='', + custom_typing_module='', + source_set=None, + reports=None) + graph = {'a': State('a', None, 'import b, c', manager), + 'b': State('b', None, 'import c', manager), + 'c': State('c', None, 'import b, d', manager), + 'd': State('d', None, 'pass', manager)} + res = sorted_components(graph) + assert_equal(res, [frozenset({'d'}), frozenset({'c', 'b'}), frozenset({'a'})]) From d6800fe30f93ebcf3745313a32fd933954805c0d Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 7 Apr 2016 08:21:05 -0700 Subject: [PATCH 111/117] Add manager and error messages to BuildResult; don't raise for non-blockers. Also move normalize_error_messages() to helpers, where it belongs. This is in preparation of introducing tests for incremental mode. CompileError is still raised for blockers. --- mypy/build.py | 29 ++++++++++++++----------- mypy/main.py | 31 ++++++++++++++++----------- mypy/test/helpers.py | 9 ++++++++ mypy/test/testcheck.py | 18 +++++++++------- mypy/test/testsemanal.py | 44 +++++++++++++++++++------------------- mypy/test/testtransform.py | 4 +++- mypy/test/testtypegen.py | 2 +- 7 files changed, 79 insertions(+), 58 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index ed59f40af000..b87244670501 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -73,14 +73,17 @@ class BuildResult: """The result of a successful build. Attributes: - files: Dictionary from module name to related AST node. - types: Dictionary from parse tree node to its inferred type. + manager: The build manager. + files: Dictionary from module name to related AST node. + types: Dictionary from parse tree node to its inferred type. + errors: List of error messages. """ - def __init__(self, files: Dict[str, MypyFile], - types: Dict[Node, Type]) -> None: - self.files = files - self.types = types + def __init__(self, manager: 'BuildManager') -> None: + self.manager = manager + self.files = manager.modules + self.types = manager.type_checker.type_map + self.errors = manager.errors.messages() class BuildSource: @@ -137,7 +140,8 @@ def build(sources: List[BuildSource], A single call to build performs parsing, semantic analysis and optionally type checking for the program *and* all imported modules, recursively. - Return BuildResult if successful; otherwise raise CompileError. + Return BuildResult if successful or only non-blocking errors were found; + otherwise raise CompileError. Args: target: select passes to perform (a build target constant, e.g. C) @@ -201,10 +205,12 @@ def build(sources: List[BuildSource], try: dispatch(sources, manager) - return BuildResult(manager.modules, manager.type_checker.type_map) + return BuildResult(manager) finally: - manager.log("Build finished with %d modules and %d types" % - (len(manager.modules), len(manager.type_checker.type_map))) + manager.log("Build finished with %d modules, %d types, and %d errors" % + (len(manager.modules), + len(manager.type_checker.type_map), + manager.errors.num_messages())) # Finish the HTML or XML reports even if CompileError was raised. reports.finish() @@ -1172,9 +1178,6 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> None: graph = load_graph(sources, manager) manager.log("Loaded graph with %d nodes" % len(graph)) process_graph(graph, manager) - if manager.errors.is_errors(): - manager.log("Found %d errors (before de-duping)" % manager.errors.num_messages()) - manager.errors.raise_error() def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph: diff --git a/mypy/main.py b/mypy/main.py index 7806e1a940d5..33735a9061f3 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -10,7 +10,7 @@ from mypy import build from mypy import defaults from mypy import git -from mypy.build import BuildSource, PYTHON_EXTENSIONS +from mypy.build import BuildSource, BuildResult, PYTHON_EXTENSIONS from mypy.errors import CompileError, set_drop_into_pdb from mypy.version import __version__ @@ -46,14 +46,19 @@ def main(script_path: str) -> None: set_drop_into_pdb(True) if not options.dirty_stubs: git.verify_git_integrity_or_abort(build.default_data_dir(bin_dir)) + f = sys.stdout try: if options.target == build.TYPE_CHECK: - type_check_only(sources, bin_dir, options) + res = type_check_only(sources, bin_dir, options) + a = res.errors else: raise RuntimeError('unsupported target %d' % options.target) except CompileError as e: - f = sys.stdout if e.use_stdout else sys.stderr - for m in e.messages: + a = e.messages + if not e.use_stdout: + f = sys.stderr + if a: + for m in a: f.write(m + '\n') sys.exit(1) @@ -83,16 +88,16 @@ def readlinkabs(link: str) -> str: def type_check_only(sources: List[BuildSource], - bin_dir: str, options: Options) -> None: + bin_dir: str, options: Options) -> BuildResult: # Type-check the program and dependencies and translate to Python. - build.build(sources=sources, - target=build.TYPE_CHECK, - bin_dir=bin_dir, - pyversion=options.pyversion, - custom_typing_module=options.custom_typing_module, - report_dirs=options.report_dirs, - flags=options.build_flags, - python_path=options.python_path) + return build.build(sources=sources, + target=build.TYPE_CHECK, + bin_dir=bin_dir, + pyversion=options.pyversion, + custom_typing_module=options.custom_typing_module, + report_dirs=options.report_dirs, + flags=options.build_flags, + python_path=options.python_path) FOOTER = """environment variables: diff --git a/mypy/test/helpers.py b/mypy/test/helpers.py index 38a49e1e0a20..023f0bd249bd 100644 --- a/mypy/test/helpers.py +++ b/mypy/test/helpers.py @@ -274,3 +274,12 @@ def testcase_pyversion(path: str, testcase_name: str) -> Tuple[int, int]: return defaults.PYTHON2_VERSION else: return testfile_pyversion(path) + + +def normalize_error_messages(messages): + """Translate an array of error messages to use / as path separator.""" + + a = [] + for m in messages: + a.append(m.replace(os.sep, '/')) + return a diff --git a/mypy/test/testcheck.py b/mypy/test/testcheck.py index 664822fd66aa..5ac2a00bfdd7 100644 --- a/mypy/test/testcheck.py +++ b/mypy/test/testcheck.py @@ -13,9 +13,9 @@ from mypy.test.config import test_temp_dir, test_data_prefix from mypy.test.data import parse_test_cases from mypy.test.helpers import ( - assert_string_arrays_equal, testcase_pyversion, update_testcase_output + assert_string_arrays_equal, normalize_error_messages, + testcase_pyversion, update_testcase_output, ) -from mypy.test.testsemanal import normalize_error_messages from mypy.errors import CompileError @@ -73,13 +73,15 @@ def run_test(self, testcase): flags = self.parse_flags(program_text) source = BuildSource(program_name, module_name, program_text) try: - build.build(target=build.TYPE_CHECK, - sources=[source], - pyversion=pyversion, - flags=flags + [build.TEST_BUILTINS], - alt_lib_path=test_temp_dir) + res = build.build(target=build.TYPE_CHECK, + sources=[source], + pyversion=pyversion, + flags=flags + [build.TEST_BUILTINS], + alt_lib_path=test_temp_dir) + a = res.errors except CompileError as e: - a = normalize_error_messages(e.messages) + a = e.messages + a = normalize_error_messages(a) if testcase.output != a and mypy.myunit.UPDATE_TESTCASES: update_testcase_output(testcase, a, mypy.myunit.APPEND_TESTCASES) diff --git a/mypy/test/testsemanal.py b/mypy/test/testsemanal.py index 68da884655b0..a1d6a5835ac6 100644 --- a/mypy/test/testsemanal.py +++ b/mypy/test/testsemanal.py @@ -7,7 +7,9 @@ from mypy import build from mypy.build import BuildSource from mypy.myunit import Suite -from mypy.test.helpers import assert_string_arrays_equal, testfile_pyversion +from mypy.test.helpers import ( + assert_string_arrays_equal, normalize_error_messages, testfile_pyversion, +) from mypy.test.data import parse_test_cases from mypy.test.config import test_data_prefix, test_temp_dir from mypy.errors import CompileError @@ -52,7 +54,9 @@ def test_semanal(testcase): pyversion=testfile_pyversion(testcase.file), flags=[build.TEST_BUILTINS], alt_lib_path=test_temp_dir) - a = [] + a = result.errors + if a: + raise CompileError(a) # Include string representations of the source files in the actual # output. for fnam in sorted(result.files.keys()): @@ -96,28 +100,19 @@ def test_semanal_error(testcase): try: src = '\n'.join(testcase.input) - build.build(target=build.SEMANTIC_ANALYSIS, - sources=[BuildSource('main', None, src)], - flags=[build.TEST_BUILTINS], - alt_lib_path=test_temp_dir) - raise AssertionError('No errors reported in {}, line {}'.format( - testcase.file, testcase.line)) + res = build.build(target=build.SEMANTIC_ANALYSIS, + sources=[BuildSource('main', None, src)], + flags=[build.TEST_BUILTINS], + alt_lib_path=test_temp_dir) + a = res.errors + assert a, 'No errors reported in {}, line {}'.format(testcase.file, testcase.line) except CompileError as e: # Verify that there was a compile error and that the error messages # are equivalent. - assert_string_arrays_equal( - testcase.output, normalize_error_messages(e.messages), - 'Invalid compiler output ({}, line {})'.format(testcase.file, - testcase.line)) - - -def normalize_error_messages(messages): - """Translate an array of error messages to use / as path separator.""" - - a = [] - for m in messages: - a.append(m.replace(os.sep, '/')) - return a + a = e.messages + assert_string_arrays_equal( + testcase.output, normalize_error_messages(a), + 'Invalid compiler output ({}, line {})'.format(testcase.file, testcase.line)) # SymbolNode table export test cases @@ -144,7 +139,9 @@ def run_test(self, testcase): flags=[build.TEST_BUILTINS], alt_lib_path=test_temp_dir) # The output is the symbol table converted into a string. - a = [] + a = result.errors + if a: + raise CompileError(a) for f in sorted(result.files.keys()): if f not in ('builtins', 'typing', 'abc'): a.append('{}:'.format(f)) @@ -181,6 +178,9 @@ def run_test(self, testcase): sources=[BuildSource('main', None, src)], flags=[build.TEST_BUILTINS], alt_lib_path=test_temp_dir) + a = result.errors + if a: + raise CompileError(a) # Collect all TypeInfos in top-level modules. typeinfos = TypeInfoMap() diff --git a/mypy/test/testtransform.py b/mypy/test/testtransform.py index 51789b943395..1d9916ea2f5e 100644 --- a/mypy/test/testtransform.py +++ b/mypy/test/testtransform.py @@ -44,7 +44,9 @@ def test_transform(testcase): pyversion=testfile_pyversion(testcase.file), flags=[build.TEST_BUILTINS], alt_lib_path=test_temp_dir) - a = [] + a = result.errors + if a: + raise CompileError(a) # Include string representations of the source files in the actual # output. for fnam in sorted(result.files.keys()): diff --git a/mypy/test/testtypegen.py b/mypy/test/testtypegen.py index 29a89cedc307..0e4432bcb71b 100644 --- a/mypy/test/testtypegen.py +++ b/mypy/test/testtypegen.py @@ -29,7 +29,6 @@ def cases(self): return c def run_test(self, testcase): - a = [] try: line = testcase.input[0] mask = '' @@ -41,6 +40,7 @@ def run_test(self, testcase): sources=[BuildSource('main', None, src)], flags=[build.TEST_BUILTINS], alt_lib_path=config.test_temp_dir) + a = result.errors map = result.types nodes = map.keys() From a9cdd9bb8f306cc3fdb6006b4eeb613a68eed216 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 7 Apr 2016 10:03:04 -0700 Subject: [PATCH 112/117] Fix docstrings for AssertionFailure and SkipTestCaseException. --- mypy/myunit/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mypy/myunit/__init__.py b/mypy/myunit/__init__.py index 714c048bbc22..de3c2a581448 100644 --- a/mypy/myunit/__init__.py +++ b/mypy/myunit/__init__.py @@ -19,7 +19,7 @@ class AssertionFailure(Exception): - """Exception used to signal skipped test cases.""" + """Exception used to signal failed test cases.""" def __init__(self, s: str = None) -> None: if s: super().__init__(s) @@ -27,7 +27,9 @@ def __init__(self, s: str = None) -> None: super().__init__() -class SkipTestCaseException(Exception): pass +class SkipTestCaseException(Exception): + """Exception used to signal skipped test cases.""" + pass def assert_true(b: bool, msg: str = None) -> None: From 5594a1b8d0952613baa523e41766042f414a3813 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 7 Apr 2016 10:13:18 -0700 Subject: [PATCH 113/117] Added support for checking incremental mode. --- mypy/test/data/check-incremental.test | 31 +++++++++ mypy/test/testcheck.py | 93 +++++++++++++++++++++++++-- 2 files changed, 119 insertions(+), 5 deletions(-) create mode 100644 mypy/test/data/check-incremental.test diff --git a/mypy/test/data/check-incremental.test b/mypy/test/data/check-incremental.test new file mode 100644 index 000000000000..c27bcd71e4a8 --- /dev/null +++ b/mypy/test/data/check-incremental.test @@ -0,0 +1,31 @@ +-- Checks for incremental mode (see testcheck.py). +-- Each test is run twice, once with a cold cache, once with a warm cache. +-- The first time it must pass. +-- Before it is run the second time, any *.py.next files are copied to *.py. +-- The second time it must produce the errors given in the [out] section, if any. + +[case testIncrementalEmpty] +[out] + +[case testIncrementalBasics] +import m +[file m.py] +def foo(): + pass +[file m.py.next] +def foo() -> None: + pass +[out] + +[case testIncrementalError] +import m +[file m.py] +def foo() -> None: + pass +[file m.py.next] +def foo() -> None: + bar() +[out] +main:1: note: In module imported here: +tmp/m.py: note: In function "foo": +tmp/m.py:2: error: Name 'bar' is not defined diff --git a/mypy/test/testcheck.py b/mypy/test/testcheck.py index 5ac2a00bfdd7..acabed232145 100644 --- a/mypy/test/testcheck.py +++ b/mypy/test/testcheck.py @@ -2,14 +2,15 @@ import os.path import re +import shutil import sys -from typing import Tuple, List +from typing import Tuple, List, Dict, Set from mypy import build import mypy.myunit # for mutable globals (ick!) from mypy.build import BuildSource -from mypy.myunit import Suite +from mypy.myunit import Suite, AssertionFailure from mypy.test.config import test_temp_dir, test_data_prefix from mypy.test.data import parse_test_cases from mypy.test.helpers import ( @@ -54,6 +55,7 @@ 'check-type-promotion.test', 'check-semanal-error.test', 'check-flags.test', + 'check-incremental.test', ] @@ -66,11 +68,43 @@ def cases(self): return c def run_test(self, testcase): - a = [] + incremental = 'Incremental' in testcase.name.lower() or 'incremental' in testcase.file + if incremental: + # Incremental tests are run once with a cold cache, once with a warm cache. + # Expect success on first run, errors from testcase.output (if any) on second run. + self.clear_cache() + self.run_test_once(testcase, 1) + self.run_test_once(testcase, 2) + else: + self.run_test_once(testcase) + + def clear_cache(self): + dn = build.MYPY_CACHE + if os.path.exists(dn): + shutil.rmtree(dn) + + def run_test_once(self, testcase, incremental=0): pyversion = testcase_pyversion(testcase.file, testcase.name) program_text = '\n'.join(testcase.input) module_name, program_name, program_text = self.parse_options(program_text) flags = self.parse_flags(program_text) + output = testcase.output + if incremental: + flags.append(build.INCREMENTAL) + if incremental == 1: + # In run 1, copy program text to program file. + output = [] + with open(program_name, 'w') as f: + f.write(program_text) + program_text = None + elif incremental == 2: + # In run 2, copy *.py.next files to *.py files. + for dn, dirs, files in os.walk(os.curdir): + for file in files: + if file.endswith('.py.next'): + full = os.path.join(dn, file) + target = full[:-5] + shutil.copy(full, target) source = BuildSource(program_name, module_name, program_text) try: res = build.build(target=build.TYPE_CHECK, @@ -80,17 +114,66 @@ def run_test(self, testcase): alt_lib_path=test_temp_dir) a = res.errors except CompileError as e: + res = None a = e.messages a = normalize_error_messages(a) - if testcase.output != a and mypy.myunit.UPDATE_TESTCASES: + if output != a and mypy.myunit.UPDATE_TESTCASES: update_testcase_output(testcase, a, mypy.myunit.APPEND_TESTCASES) assert_string_arrays_equal( - testcase.output, a, + output, a, 'Invalid type checker output ({}, line {})'.format( testcase.file, testcase.line)) + if incremental and res: + self.verify_cache(module_name, program_name, a, res.manager) + + def verify_cache(self, module_name: str, program_name: str, a: List[str], + manager: build.BuildManager): + # There should be valid cache metadata for each module except + # those in error_paths; for those there should not be. + # + # NOTE: When A imports B and there's an error in B, the cache + # data for B is invalidated, but the cache data for A remains. + # However build.process_graphs() will ignore A's cache data. + error_paths = self.find_error_paths(a) + modules = self.find_module_files() + modules.update({module_name: program_name}) + missing_paths = self.find_missing_cache_files(modules, manager) + if missing_paths != error_paths: + raise AssertionFailure("cache data discrepancy %s != %s" % + (missing_paths, error_paths)) + + def find_error_paths(self, a: List[str]) -> Set[str]: + hits = set() + for line in a: + m = re.match(r'([^\s:]+):\d+: error:', line) + if m: + hits.add(m.group(1)) + return hits + + def find_module_files(self): + modules = {} + for dn, dirs, files in os.walk(test_temp_dir): + dnparts = dn.split(os.sep) + assert dnparts[0] == test_temp_dir + del dnparts[0] + for file in files: + if file.endswith('.py'): + base, ext = os.path.splitext(file) + id = '.'.join(dnparts + [base]) + modules[id] = os.path.join(dn, file) + return modules + + def find_missing_cache_files(self, modules, manager): + missing = {} + for id, path in modules.items(): + meta = build.find_cache_meta(id, path, manager) + if meta is None: + missing[id] = path + return set(missing.values()) + def parse_options(self, program_text: str) -> Tuple[str, str, str]: """Return type check options for a test case. From 700d9590ebdb2e08fef7a10a1fbecfa6d943db68 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 7 Apr 2016 12:43:07 -0700 Subject: [PATCH 114/117] Fully annotate testcheck.py --- mypy/test/testcheck.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/mypy/test/testcheck.py b/mypy/test/testcheck.py index acabed232145..8f8c1c60b5cf 100644 --- a/mypy/test/testcheck.py +++ b/mypy/test/testcheck.py @@ -12,7 +12,7 @@ from mypy.build import BuildSource from mypy.myunit import Suite, AssertionFailure from mypy.test.config import test_temp_dir, test_data_prefix -from mypy.test.data import parse_test_cases +from mypy.test.data import parse_test_cases, DataDrivenTestCase from mypy.test.helpers import ( assert_string_arrays_equal, normalize_error_messages, testcase_pyversion, update_testcase_output, @@ -60,14 +60,15 @@ class TypeCheckSuite(Suite): - def cases(self): - c = [] + + def cases(self) -> List[DataDrivenTestCase]: + c = [] # type: List[DataDrivenTestCase] for f in files: c += parse_test_cases(os.path.join(test_data_prefix, f), self.run_test, test_temp_dir, True) return c - def run_test(self, testcase): + def run_test(self, testcase: DataDrivenTestCase) -> None: incremental = 'Incremental' in testcase.name.lower() or 'incremental' in testcase.file if incremental: # Incremental tests are run once with a cold cache, once with a warm cache. @@ -78,12 +79,12 @@ def run_test(self, testcase): else: self.run_test_once(testcase) - def clear_cache(self): + def clear_cache(self) -> None: dn = build.MYPY_CACHE if os.path.exists(dn): shutil.rmtree(dn) - def run_test_once(self, testcase, incremental=0): + def run_test_once(self, testcase: DataDrivenTestCase, incremental=0) -> None: pyversion = testcase_pyversion(testcase.file, testcase.name) program_text = '\n'.join(testcase.input) module_name, program_name, program_text = self.parse_options(program_text) @@ -130,7 +131,7 @@ def run_test_once(self, testcase, incremental=0): self.verify_cache(module_name, program_name, a, res.manager) def verify_cache(self, module_name: str, program_name: str, a: List[str], - manager: build.BuildManager): + manager: build.BuildManager) -> None: # There should be valid cache metadata for each module except # those in error_paths; for those there should not be. # @@ -153,7 +154,7 @@ def find_error_paths(self, a: List[str]) -> Set[str]: hits.add(m.group(1)) return hits - def find_module_files(self): + def find_module_files(self) -> Dict[str, str]: modules = {} for dn, dirs, files in os.walk(test_temp_dir): dnparts = dn.split(os.sep) @@ -166,7 +167,8 @@ def find_module_files(self): modules[id] = os.path.join(dn, file) return modules - def find_missing_cache_files(self, modules, manager): + def find_missing_cache_files(self, modules: Dict[str, str], + manager: build.BuildManager) -> Set[str]: missing = {} for id, path in modules.items(): meta = build.find_cache_meta(id, path, manager) From 842a5e1c0d573fcdbba74ddc0714c79ec7af6b3d Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 7 Apr 2016 12:57:27 -0700 Subject: [PATCH 115/117] Fully annotate testgraph.py. --- mypy/test/testgraph.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/mypy/test/testgraph.py b/mypy/test/testgraph.py index 34c479746c17..6f9897660acf 100644 --- a/mypy/test/testgraph.py +++ b/mypy/test/testgraph.py @@ -1,5 +1,7 @@ """Test cases for graph processing code in build.py.""" +from typing import AbstractSet, Dict, Set + from mypy.myunit import Suite, assert_equal from mypy.build import BuildManager, State, TYPE_CHECK from mypy.build import topsort, strongly_connected_components, sorted_components @@ -7,28 +9,28 @@ class GraphSuite(Suite): - def test_topsort(self): + def test_topsort(self) -> None: a = frozenset({'A'}) b = frozenset({'B'}) c = frozenset({'C'}) d = frozenset({'D'}) - data = {a: {b, c}, b: {d}, c: {d}} + data = {a: {b, c}, b: {d}, c: {d}} # type: Dict[AbstractSet[str], Set[AbstractSet[str]]] res = list(topsort(data)) assert_equal(res, [{d}, {b, c}, {a}]) - def test_scc(self): + def test_scc(self) -> None: vertices = {'A', 'B', 'C', 'D'} edges = {'A': ['B', 'C'], 'B': ['C'], 'C': ['B', 'D'], - 'D': []} - sccs = set(map(frozenset, strongly_connected_components(vertices, edges))) + 'D': []} # type: Dict[str, List[str]] + sccs = set(frozenset(x) for x in strongly_connected_components(vertices, edges)) assert_equal(sccs, {frozenset({'A'}), frozenset({'B', 'C'}), frozenset({'D'})}) - def test_sorted_components(self): + def test_sorted_components(self) -> None: manager = BuildManager( data_dir='', lib_path=[], From cb00db6bbaa619192eb7414187eb1a482f2871b1 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 7 Apr 2016 13:59:02 -0700 Subject: [PATCH 116/117] Add missing annotations to helpers.py. --- mypy/test/helpers.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mypy/test/helpers.py b/mypy/test/helpers.py index 023f0bd249bd..6877fb46d02b 100644 --- a/mypy/test/helpers.py +++ b/mypy/test/helpers.py @@ -7,6 +7,7 @@ from mypy import defaults from mypy.myunit import AssertionFailure from mypy.test import config +from mypy.test.data import DataDrivenTestCase # AssertStringArraysEqual displays special line alignment helper messages if @@ -85,7 +86,7 @@ def assert_string_arrays_equal(expected: List[str], actual: List[str], raise AssertionFailure(msg) -def update_testcase_output(testcase, output, append): +def update_testcase_output(testcase: DataDrivenTestCase, output: List[str], append: str) -> None: testcase_path = os.path.join(testcase.old_cwd, testcase.file) newfile = testcase_path + append data_lines = open(testcase_path).read().splitlines() @@ -182,7 +183,7 @@ def assert_string_arrays_equal_wildcards(expected: List[str], assert_string_arrays_equal(expected, actual, msg) -def clean_up(a): +def clean_up(a: List[str]) -> List[str]: """Remove common directory prefix from all strings in a. This uses a naive string replace; it seems to work well enough. Also @@ -276,7 +277,7 @@ def testcase_pyversion(path: str, testcase_name: str) -> Tuple[int, int]: return testfile_pyversion(path) -def normalize_error_messages(messages): +def normalize_error_messages(messages: List[str]) -> List[str]: """Translate an array of error messages to use / as path separator.""" a = [] From b513603a84b5a84c3c7dd667c904c2c116c9428a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 8 Apr 2016 10:27:25 -0700 Subject: [PATCH 117/117] Fix typo in comment. --- mypy/nodes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypy/nodes.py b/mypy/nodes.py index 0e9e44b18fef..83739cd8976a 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -653,7 +653,7 @@ def is_generic(self) -> bool: return self.info.is_generic() def serialize(self) -> JsonDict: - # Not serialized: defs, base_type_exprs,d ecorators + # Not serialized: defs, base_type_exprs, decorators return {'.class': 'ClassDef', 'name': self.name, 'fullname': self.fullname,