diff --git a/.gitignore b/.gitignore index 800d9c80a8bb..68eedb55212a 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ docs/build/ *.iml /out/ .venv/ +.mypy_cache/ # Packages *.egg diff --git a/mypy/build.py b/mypy/build.py index 34043e19e2ff..b87244670501 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -8,29 +8,35 @@ The function build() is the main interface to this module. """ +# TODO: More consistent terminology, e.g. path/fnam, module/id, state/file +import binascii +import collections +import contextlib +import json import os import os.path -import shlex -import subprocess import sys -import re +import time from os.path import dirname, basename -from typing import Dict, List, Tuple, Iterable, cast, Set, Union, Optional +from typing import (AbstractSet, Dict, Iterable, Iterator, List, + NamedTuple, Optional, Set, Tuple, Union) from mypy.types import Type -from mypy.nodes import MypyFile, Node, Import, ImportFrom, ImportAll -from mypy.nodes import SymbolTableNode, MODULE_REF -from mypy.semanal import SemanticAnalyzer, FirstPass, ThirdPass +from mypy.nodes import (MypyFile, Node, Import, ImportFrom, ImportAll, + SymbolTableNode, MODULE_REF) +from mypy.semanal import FirstPass, SemanticAnalyzer, ThirdPass from mypy.checker import TypeChecker from mypy.errors import Errors, CompileError -from mypy import parse -from mypy import stats +from mypy import fixup from mypy.report import Reports from mypy import defaults from mypy import moduleinfo from mypy import util +from mypy.fixup import fixup_module_pass_one, fixup_module_pass_two +from mypy.parse import parse +from mypy.stats import dump_type_stats # We need to know the location of this file to load data, but @@ -51,6 +57,7 @@ DUMP_TYPE_STATS = 'dump-type-stats' DUMP_INFER_STATS = 'dump-infer-stats' SILENT_IMPORTS = 'silent-imports' # Silence imports of .py files +INCREMENTAL = 'incremental' # Incremental mode: use the cache FAST_PARSER = 'fast-parser' # Use experimental fast parser # Disallow calling untyped functions from typed ones DISALLOW_UNTYPED_CALLS = 'disallow-untyped-calls' @@ -59,43 +66,24 @@ # Type check unannotated functions CHECK_UNTYPED_DEFS = 'check-untyped-defs' -# State ids. These describe the states a source file / module can be in a -# build. - -# We aren't processing this source file yet (no associated state object). -UNSEEN_STATE = 0 -# The source file has a state object, but we haven't done anything with it yet. -UNPROCESSED_STATE = 1 -# We've parsed the source file. -PARSED_STATE = 2 -# We've done the first two passes of semantic analysis. -PARTIAL_SEMANTIC_ANALYSIS_STATE = 3 -# We've semantically analyzed the source file. -SEMANTICALLY_ANALYSED_STATE = 4 -# We've type checked the source file (and all its dependencies). -TYPE_CHECKED_STATE = 5 - PYTHON_EXTENSIONS = ['.pyi', '.py'] -final_state = TYPE_CHECKED_STATE - - -def earlier_state(s: int, t: int) -> bool: - return s < t - class BuildResult: """The result of a successful build. Attributes: - files: Dictionary from module name to related AST node. - types: Dictionary from parse tree node to its inferred type. + manager: The build manager. + files: Dictionary from module name to related AST node. + types: Dictionary from parse tree node to its inferred type. + errors: List of error messages. """ - def __init__(self, files: Dict[str, MypyFile], - types: Dict[Node, Type]) -> None: - self.files = files - self.types = types + def __init__(self, manager: 'BuildManager') -> None: + self.manager = manager + self.files = manager.modules + self.types = manager.type_checker.type_map + self.errors = manager.errors.messages() class BuildSource: @@ -105,15 +93,6 @@ def __init__(self, path: Optional[str], module: Optional[str], self.module = module or '__main__' self.text = text - def load(self, lib_path, pyversion: Tuple[int, int]) -> str: - """Load the module if needed. This also has the side effect - of calculating the effective path for modules.""" - if self.text is not None: - return self.text - - self.path = self.path or lookup_program(self.module, lib_path) - return read_program(self.path, pyversion) - @property def effective_path(self) -> str: """Return the effective path (ie, if its from in memory)""" @@ -161,7 +140,8 @@ def build(sources: List[BuildSource], A single call to build performs parsing, semantic analysis and optionally type checking for the program *and* all imported modules, recursively. - Return BuildResult if successful; otherwise raise CompileError. + Return BuildResult if successful or only non-blocking errors were found; + otherwise raise CompileError. Args: target: select passes to perform (a build target constant, e.g. C) @@ -213,8 +193,7 @@ def build(sources: List[BuildSource], source_set = BuildSourceSet(sources) - # Construct a build manager object that performs all the stages of the - # build in the correct order. + # Construct a build manager object to hold state during the build. # # Ignore current directory prefix in error messages. manager = BuildManager(data_dir, lib_path, target, @@ -224,21 +203,16 @@ def build(sources: List[BuildSource], source_set=source_set, reports=reports) - # Construct information that describes the initial files. __main__ is the - # implicit module id and the import context is empty initially ([]). - initial_states = [] # type: List[UnprocessedFile] - for source in sources: - content = source.load(lib_path, pyversion) - info = StateInfo(source.effective_path, source.module, [], manager) - initial_state = UnprocessedFile(info, content) - initial_states += [initial_state] - - # Perform the build by sending the files as new file (UnprocessedFile is the - # initial state of all files) to the manager. The manager will process the - # file and all dependant modules recursively. - result = manager.process(initial_states) - reports.finish() - return result + try: + dispatch(sources, manager) + return BuildResult(manager) + finally: + manager.log("Build finished with %d modules, %d types, and %d errors" % + (len(manager.modules), + len(manager.type_checker.type_map), + manager.errors.num_messages())) + # Finish the HTML or XML reports even if CompileError was raised. + reports.finish() def default_data_dir(bin_dir: str) -> str: @@ -335,39 +309,29 @@ def default_lib_path(data_dir: str, pyversion: Tuple[int, int], return path -def lookup_program(module: str, lib_path: List[str]) -> str: - # Modules are .py or .pyi - path = find_module(module, lib_path) - if path: - return path - else: - raise CompileError([ - "mypy: can't find module '{}'".format(module)]) - - -def read_program(path: str, pyversion: Tuple[int, int]) -> str: - try: - text = read_with_python_encoding(path, pyversion) - except IOError as ioerr: - raise CompileError([ - "mypy: can't read file '{}': {}".format(path, ioerr.strerror)]) - except UnicodeDecodeError as decodeerr: - raise CompileError([ - "mypy: can't decode file '{}': {}".format(path, str(decodeerr))]) - return text +CacheMeta = NamedTuple('CacheMeta', + [('id', str), + ('path', str), + ('mtime', float), + ('size', int), + ('dependencies', List[str]), + ('data_mtime', float), # mtime of data_json + ('data_json', str), # path of .data.json + ]) class BuildManager: - """This is the central class for building a mypy program. + """This class holds shared state for building a mypy program. - It coordinates parsing, import processing, semantic analysis and - type checking. It manages state objects that actually perform the - build steps. + It is used to coordinate parsing, import processing, semantic + analysis and type checking. The actual build steps are carried + out by dispatch(). Attributes: data_dir: Mypy data directory (contains stubs) target: Build target; selects which passes to perform lib_path: Library path for looking up modules + modules: Mapping of module ID to MypyFile (shared by the passes) semantic_analyzer: Semantic analyzer, pass 2 semantic_analyzer_pass3: @@ -376,16 +340,6 @@ class BuildManager: errors: Used for reporting all errors pyversion: Python version (major, minor) flags: Build options - states: States of all individual files that are being - processed. Each file in a build is always represented - by a single state object (after it has been encountered - for the first time). This is the only place where - states are stored. - module_files: Map from module name to source file path. There is a - 1:1 mapping between modules and source files. - module_deps: Cache for module dependencies (direct or indirect). - Item (m, n) indicates whether m depends on n (directly - or indirectly). missing_modules: Set of modules that could not be imported encountered so far """ @@ -398,6 +352,7 @@ def __init__(self, data_dir: str, custom_typing_module: str, source_set: BuildSourceSet, reports: Reports) -> None: + self.start_time = time.time() self.data_dir = data_dir self.errors = Errors() self.errors.set_ignore_prefix(ignore_prefix) @@ -410,157 +365,16 @@ def __init__(self, data_dir: str, self.reports = reports self.semantic_analyzer = SemanticAnalyzer(lib_path, self.errors, pyversion=pyversion) - modules = self.semantic_analyzer.modules - self.semantic_analyzer_pass3 = ThirdPass(modules, self.errors) + self.modules = self.semantic_analyzer.modules + self.semantic_analyzer_pass3 = ThirdPass(self.modules, self.errors) self.type_checker = TypeChecker(self.errors, - modules, + self.modules, self.pyversion, DISALLOW_UNTYPED_CALLS in self.flags, DISALLOW_UNTYPED_DEFS in self.flags, CHECK_UNTYPED_DEFS in self.flags) - self.states = [] # type: List[State] - self.module_files = {} # type: Dict[str, str] - self.module_deps = {} # type: Dict[Tuple[str, str], bool] self.missing_modules = set() # type: Set[str] - def process(self, initial_states: List['UnprocessedFile']) -> BuildResult: - """Perform a build. - - The argument is a state that represents the main program - file. This method should only be called once per a build - manager object. The return values are identical to the return - values of the build function. - """ - self.states += initial_states - for initial_state in initial_states: - self.module_files[initial_state.id] = initial_state.path - for initial_state in initial_states: - initial_state.load_dependencies() - - # Process states in a loop until all files (states) have been - # semantically analyzed or type checked (depending on target). - # - # We type check all files before the rest of the passes so that we can - # report errors and fail as quickly as possible. - while True: - # Find the next state that has all its dependencies met. - next = self.next_available_state() - if not next: - self.trace('done') - break - - # Potentially output some debug information. - self.trace('next {} ({})'.format(next.path, next.state())) - - # Set the import context for reporting error messages correctly. - self.errors.set_import_context(next.import_context) - # Process the state. The process method is responsible for adding a - # new state object representing the new state of the file. - next.process() - - # Raise exception if the build failed. The build can fail for - # various reasons, such as parse error, semantic analysis error, - # etc. - if self.errors.is_blockers(): - self.errors.raise_error() - - # If there were no errors, all files should have been fully processed. - for s in self.states: - assert s.state() == final_state, ( - '{} still unprocessed in state {}'.format(s.path, s.state())) - - if self.errors.is_errors(): - self.errors.raise_error() - - # Collect a list of all files. - trees = [] # type: List[MypyFile] - for state in self.states: - trees.append(cast(ParsedFile, state).tree) - - # Perform any additional passes after type checking for all the files. - self.final_passes(trees, self.type_checker.type_map) - - return BuildResult(self.semantic_analyzer.modules, - self.type_checker.type_map) - - def next_available_state(self) -> 'State': - """Find a ready state (one that has all its dependencies met).""" - i = len(self.states) - 1 - while i >= 0: - if self.states[i].is_ready(): - num_incomplete = self.states[i].num_incomplete_deps() - if num_incomplete == 0: - # This is perfect; no need to look for the best match. - return self.states[i] - i -= 1 - return None - - def has_module(self, name: str) -> bool: - """Have we seen a module yet?""" - return name in self.module_files - - def file_state(self, path: str) -> int: - """Return the state of a source file. - - In particular, return UNSEEN_STATE if the file has no associated - state. - - This function does not consider any dependencies. - """ - for s in self.states: - if s.path == path: - return s.state() - return UNSEEN_STATE - - def module_state(self, name: str) -> int: - """Return the state of a module. - - In particular, return UNSEEN_STATE if the file has no associated - state. - - This considers also module dependencies. - """ - if not self.has_module(name): - return UNSEEN_STATE - state = final_state - fs = self.file_state(self.module_files[name]) - if earlier_state(fs, state): - state = fs - return state - - def is_dep(self, m1: str, m2: str, done: Set[str] = None) -> bool: - """Does m1 import m2 directly or indirectly?""" - # Have we computed this previously? - dep = self.module_deps.get((m1, m2)) - if dep is not None: - return dep - - if not done: - done = set([m1]) - - # m1 depends on m2 iff one of the deps of m1 depends on m2. - st = self.lookup_state(m1) - for m in st.dependencies: - if m in done: - continue - done.add(m) - # Cache this dependency. - self.module_deps[m1, m] = True - # Search recursively. - if m == m2 or self.is_dep(m, m2, done): - # Yes! Mark it in the cache. - self.module_deps[m1, m2] = True - return True - # No dependency. Mark it in the cache. - self.module_deps[m1, m2] = False - return False - - def lookup_state(self, module: str) -> 'State': - for state in self.states: - if state.id == module: - return state - raise RuntimeError('%s not found' % module) - def all_imported_modules_in_file(self, file: MypyFile) -> List[Tuple[str, int]]: """Find all reachable import statements in a file. @@ -590,12 +404,22 @@ def correct_rel_imp(imp: Union[ImportFrom, ImportAll]) -> str: res.append((id, imp.line)) elif isinstance(imp, ImportFrom): cur_id = correct_rel_imp(imp) - res.append((cur_id, imp.line)) + pos = len(res) + all_are_submodules = True # Also add any imported names that are submodules. for name, __ in imp.names: sub_id = cur_id + '.' + name if self.is_module(sub_id): res.append((sub_id, imp.line)) + else: + all_are_submodules = False + # If all imported names are submodules, don't add + # cur_id as a dependency. Otherwise (i.e., if at + # least one imported name isn't a submodule) + # cur_id is also a dependency, and we should + # insert it *before* any submodules. + if not all_are_submodules: + res.insert(pos, ((cur_id, imp.line))) elif isinstance(imp, ImportAll): res.append((correct_rel_imp(imp), imp.line)) return res @@ -604,25 +428,51 @@ def is_module(self, id: str) -> bool: """Is there a file in the file system corresponding to module id?""" return find_module(id, self.lib_path) is not None - def final_passes(self, files: List[MypyFile], - types: Dict[Node, Type]) -> None: - """Perform the code generation passes for type checked files.""" - if self.target in [SEMANTIC_ANALYSIS, TYPE_CHECK]: - pass # Nothing to do. + def parse_file(self, id: str, path: str, source: str) -> MypyFile: + """Parse the source of a file with the given name. + + Raise CompileError if there is a parse error. + """ + num_errs = self.errors.num_messages() + tree = parse(source, path, self.errors, + pyversion=self.pyversion, + custom_typing_module=self.custom_typing_module, + fast_parser=FAST_PARSER in self.flags) + tree._fullname = id + if self.errors.num_messages() != num_errs: + self.log("Bailing due to parse errors") + self.errors.raise_error() + return tree + + def module_not_found(self, path: str, line: int, id: str) -> None: + self.errors.set_file(path) + stub_msg = "(Stub files are from https://github.com/python/typeshed)" + if ((self.pyversion[0] == 2 and moduleinfo.is_py2_std_lib_module(id)) or + (self.pyversion[0] >= 3 and moduleinfo.is_py3_std_lib_module(id))): + self.errors.report( + line, "No library stub file for standard library module '{}'".format(id)) + self.errors.report(line, stub_msg, severity='note', only_once=True) + elif moduleinfo.is_third_party_module(id): + self.errors.report(line, "No library stub file for module '{}'".format(id)) + self.errors.report(line, stub_msg, severity='note', only_once=True) else: - raise RuntimeError('Unsupported target %d' % self.target) + self.errors.report(line, "Cannot find module named '{}'".format(id)) + self.errors.report(line, "(Perhaps setting MYPYPATH would help)", severity='note', + only_once=True) def report_file(self, file: MypyFile) -> None: if self.source_set.is_source(file): self.reports.file(file, type_map=self.type_checker.type_map) - def log(self, message: str) -> None: + def log(self, *message: str) -> None: if VERBOSE in self.flags: - print('LOG:', message, file=sys.stderr) + print('%.3f:LOG: ' % (time.time() - self.start_time), *message, file=sys.stderr) + sys.stderr.flush() - def trace(self, message: str) -> None: + def trace(self, *message: str) -> None: if self.flags.count(VERBOSE) >= 2: - print('TRACE:', message, file=sys.stderr) + print('%.3f:TRACE:' % (time.time() - self.start_time), *message, file=sys.stderr) + sys.stderr.flush() def remove_cwd_prefix_from_path(p: str) -> str: @@ -637,7 +487,9 @@ def remove_cwd_prefix_from_path(p: str) -> str: if basename(cur) != '': cur += os.sep # Compute root path. - while p and os.path.isfile(os.path.join(p, '__init__.py')): + while (p and + (os.path.isfile(os.path.join(p, '__init__.py')) or + os.path.isfile(os.path.join(p, '__init__.pyi')))): dir, base = os.path.split(p) if not base: break @@ -651,373 +503,6 @@ def remove_cwd_prefix_from_path(p: str) -> str: return p -class StateInfo: - """Description of a source file that is being built.""" - - def __init__(self, path: str, id: str, - import_context: List[Tuple[str, int]], - manager: BuildManager) -> None: - """Initialize state information. - - Arguments: - path: Path to the file - id: Module id, such as 'os.path' or '__main__' (for the main - program file) - import_context: - The import trail that caused this module to be - imported (path, line) tuples - manager: The manager that manages this build - """ - self.path = path - self.id = id - self.import_context = import_context - self.manager = manager - - -class State: - """Abstract base class for build states. - - There is always at most one state per source file. - """ - - # The StateInfo attributes are duplicated here for convenience. - path = '' - id = '' - import_context = None # type: List[Tuple[str, int]] - manager = None # type: BuildManager - # Modules that this file directly depends on (in no particular order). - dependencies = None # type: List[str] - - def __init__(self, info: StateInfo) -> None: - self.path = info.path - self.id = info.id - self.import_context = info.import_context - self.manager = info.manager - self.dependencies = [] - - def info(self) -> StateInfo: - return StateInfo(self.path, self.id, self.import_context, self.manager) - - def process(self) -> None: - raise RuntimeError('Not implemented') - - def is_ready(self) -> bool: - """Return True if all dependencies are at least in the same state - as this object (but not in the initial state). - """ - for module in self.dependencies: - state = self.manager.module_state(module) - if earlier_state(state, - self.state()) or state == UNPROCESSED_STATE: - return False - return True - - def num_incomplete_deps(self) -> int: - """Return the number of dependencies that are ready but incomplete.""" - return 0 # Does not matter in this state - - def state(self) -> int: - raise RuntimeError('Not implemented') - - def switch_state(self, state_object: 'State') -> None: - """Called by state objects to replace the state of the file. - - Also notify the manager. - """ - for i in range(len(self.manager.states)): - if self.manager.states[i].path == state_object.path: - self.manager.states[i] = state_object - return - raise RuntimeError('State for {} not found'.format(state_object.path)) - - def errors(self) -> Errors: - return self.manager.errors - - def semantic_analyzer(self) -> SemanticAnalyzer: - return self.manager.semantic_analyzer - - def semantic_analyzer_pass3(self) -> ThirdPass: - return self.manager.semantic_analyzer_pass3 - - def type_checker(self) -> TypeChecker: - return self.manager.type_checker - - def fail(self, path: str, line: int, msg: str, blocker: bool = True) -> None: - """Report an error in the build (e.g. if could not find a module).""" - self.errors().set_file(path) - self.errors().report(line, msg, blocker=blocker) - - def module_not_found(self, path: str, line: int, id: str) -> None: - self.errors().set_file(path) - stub_msg = "(Stub files are from https://github.com/python/typeshed)" - if ((self.manager.pyversion[0] == 2 and moduleinfo.is_py2_std_lib_module(id)) or - (self.manager.pyversion[0] >= 3 and moduleinfo.is_py3_std_lib_module(id))): - self.errors().report( - line, "No library stub file for standard library module '{}'".format(id)) - self.errors().report(line, stub_msg, severity='note', only_once=True) - elif moduleinfo.is_third_party_module(id): - self.errors().report(line, "No library stub file for module '{}'".format(id)) - self.errors().report(line, stub_msg, severity='note', only_once=True) - else: - self.errors().report(line, "Cannot find module named '{}'".format(id)) - self.errors().report(line, "(Perhaps setting MYPYPATH would help)", severity='note', - only_once=True) - - -class UnprocessedFile(State): - def __init__(self, info: StateInfo, program_text: str) -> None: - super().__init__(info) - self.program_text = program_text - self.silent = SILENT_IMPORTS in self.manager.flags - - def load_dependencies(self): - # Add surrounding package(s) as dependencies. - for p in super_packages(self.id): - if p in self.manager.missing_modules: - continue - if not self.import_module(p): - # Could not find a module. Typically the reason is a - # misspelled module name, missing stub, module not in - # search path or the module has not been installed. - if self.silent: - self.manager.missing_modules.add(p) - else: - self.module_not_found(self.path, 1, p) - else: - self.dependencies.append(p) - - def process(self) -> None: - """Parse the file, store global names and advance to the next state.""" - if self.id in self.manager.semantic_analyzer.modules: - self.fail(self.path, 1, "Duplicate module named '{}'".format(self.id)) - return - - tree = self.parse(self.program_text, self.path) - - # Store the parsed module in the shared module symbol table. - self.manager.semantic_analyzer.modules[self.id] = tree - - if '.' in self.id: - # Include module in the symbol table of the enclosing package. - c = self.id.split('.') - p = '.'.join(c[:-1]) - sem_anal = self.manager.semantic_analyzer - if p in sem_anal.modules: - sem_anal.modules[p].names[c[-1]] = SymbolTableNode( - MODULE_REF, tree, p) - - if self.id != 'builtins': - # The builtins module is imported implicitly in every program (it - # contains definitions of int, print etc.). - self.manager.trace('import builtins') - if not self.import_module('builtins'): - self.fail(self.path, 1, 'Could not find builtins') - - # Do the first pass of semantic analysis: add top-level definitions in - # the file to the symbol table. We must do this before processing imports, - # since this may mark some import statements as unreachable. - first = FirstPass(self.semantic_analyzer()) - first.analyze(tree, self.path, self.id) - - # Add all directly imported modules to be processed (however they are - # not processed yet, just waiting to be processed). - for id, line in self.manager.all_imported_modules_in_file(tree): - self.errors().push_import_context(self.path, line) - try: - res = self.import_module(id) - finally: - self.errors().pop_import_context() - if not res: - if id == '': - # Must be from a relative import. - self.fail(self.path, line, - "No parent module -- cannot perform relative import".format(id), - blocker=True) - else: - if (line not in tree.ignored_lines and - 'import' not in tree.weak_opts and - not self.silent): - self.module_not_found(self.path, line, id) - self.manager.missing_modules.add(id) - - # Initialize module symbol table, which was populated by the semantic - # analyzer. - tree.names = self.semantic_analyzer().globals - - # Replace this state object with a parsed state in BuildManager. - self.switch_state(ParsedFile(self.info(), tree)) - - def import_module(self, id: str) -> bool: - """Schedule a module to be processed. - - Add an unprocessed state object corresponding to the module to the - manager, or do nothing if the module already has a state object. - """ - if self.manager.has_module(id): - # Do nothing: already being compiled. - return True - - if id == 'builtins' and self.manager.pyversion[0] == 2: - # The __builtin__ module is called internally by mypy 'builtins' in Python 2 mode - # (similar to Python 3), but the stub file is __builtin__.pyi. The reason is that - # a lot of code hard codes 'builtins.x' and this it's easier to work it around like - # this. It also means that the implementation can mostly ignore the difference and - # just assume 'builtins' everywhere, which simplifies code. - file_id = '__builtin__' - else: - file_id = id - path, text = read_module_source_from_file(file_id, self.manager.lib_path, - self.manager.pyversion, self.silent) - if text is not None: - info = StateInfo(path, id, self.errors().import_context(), - self.manager) - new_file = UnprocessedFile(info, text) - self.manager.states.append(new_file) - self.manager.module_files[id] = path - new_file.load_dependencies() - return True - else: - return False - - def parse(self, source_text: Union[str, bytes], fnam: str) -> MypyFile: - """Parse the source of a file with the given name. - - Raise CompileError if there is a parse error. - """ - num_errs = self.errors().num_messages() - tree = parse.parse(source_text, fnam, self.errors(), - pyversion=self.manager.pyversion, - custom_typing_module=self.manager.custom_typing_module, - fast_parser=FAST_PARSER in self.manager.flags) - tree._fullname = self.id - if self.errors().num_messages() != num_errs: - self.errors().raise_error() - return tree - - def state(self) -> int: - return UNPROCESSED_STATE - - -class ParsedFile(State): - tree = None # type: MypyFile - - def __init__(self, info: StateInfo, tree: MypyFile) -> None: - super().__init__(info) - self.tree = tree - - # Build a list all directly imported moules (dependencies). - imp = [] # type: List[str] - for id, line in self.manager.all_imported_modules_in_file(tree): - # Omit missing modules, as otherwise we could not type check - # programs with missing modules. - if id not in self.manager.missing_modules and id != self.id: - imp.append(id) - if self.id != 'builtins': - imp.append('builtins') - - if imp != []: - self.manager.trace('{} dependencies: {}'.format(info.path, imp)) - - # Record the dependencies. Note that the dependencies list also - # contains any superpackages and we must preserve them (e.g. os for - # os.path). - self.dependencies.extend(imp) - - def process(self) -> None: - """Semantically analyze file and advance to the next state.""" - self.semantic_analyzer().visit_file(self.tree, self.tree.path) - self.switch_state(PartiallySemanticallyAnalyzedFile(self.info(), - self.tree)) - - def num_incomplete_deps(self) -> int: - """Return the number of dependencies that are incomplete. - - Here complete means that their state is *later* than this module. - Cyclic dependencies are omitted to break cycles forcibly (and somewhat - arbitrarily). - """ - incomplete = 0 - for module in self.dependencies: - state = self.manager.module_state(module) - if (not earlier_state(self.state(), state) and - not self.manager.is_dep(module, self.id)): - incomplete += 1 - return incomplete - - def state(self) -> int: - return PARSED_STATE - - -class PartiallySemanticallyAnalyzedFile(ParsedFile): - def process(self) -> None: - """Perform final pass of semantic analysis and advance state.""" - self.semantic_analyzer_pass3().visit_file(self.tree, self.tree.path) - if DUMP_TYPE_STATS in self.manager.flags: - stats.dump_type_stats(self.tree, self.tree.path) - self.switch_state(SemanticallyAnalyzedFile(self.info(), self.tree)) - - def state(self) -> int: - return PARTIAL_SEMANTIC_ANALYSIS_STATE - - -class SemanticallyAnalyzedFile(ParsedFile): - def process(self) -> None: - """Type check file and advance to the next state.""" - if self.manager.target >= TYPE_CHECK: - self.type_checker().visit_file(self.tree, self.tree.path) - if DUMP_INFER_STATS in self.manager.flags: - stats.dump_type_stats(self.tree, self.tree.path, inferred=True, - typemap=self.manager.type_checker.type_map) - self.manager.report_file(self.tree) - - # FIX remove from active state list to speed up processing - - self.switch_state(TypeCheckedFile(self.info(), self.tree)) - - def state(self) -> int: - return SEMANTICALLY_ANALYSED_STATE - - -class TypeCheckedFile(SemanticallyAnalyzedFile): - def process(self) -> None: - """Finished, so cannot process.""" - raise RuntimeError('Cannot process TypeCheckedFile') - - def is_ready(self) -> bool: - """Finished, so cannot ever become ready.""" - return False - - def state(self) -> int: - return TYPE_CHECKED_STATE - - -def read_module_source_from_file(id: str, - lib_path: Iterable[str], - pyversion: Tuple[int, int], - silent: bool) -> Tuple[Optional[str], Optional[str]]: - """Find and read the source file of a module. - - Return a pair (path, file contents). Return (None, None) if the module - could not be found or read. - - Args: - id: module name, a string of form 'foo' or 'foo.bar' - lib_path: library search path - silent: if set, don't import .py files (only .pyi files) - """ - path = find_module(id, lib_path) - if path is not None: - if silent and not path.endswith('.pyi'): - return None, None - try: - text = read_with_python_encoding(path, pyversion) - except IOError: - return None, None - return path, text - else: - return None, None - - # Cache find_module: (id, lib_path) -> result. find_module_cache = {} # type: Dict[Tuple[str, Tuple[str, ...]], str] @@ -1121,23 +606,6 @@ def verify_module(id: str, path: str) -> bool: return True -def super_packages(id: str) -> List[str]: - """Return the surrounding packages of a module, e.g. ['os'] for os.path.""" - c = id.split('.') - res = [] # type: List[str] - for i in range(1, len(c)): - res.append('.'.join(c[:i])) - return res - - -def make_parent_dirs(path: str) -> None: - parent = os.path.dirname(path) - try: - os.makedirs(parent) - except OSError: - pass - - def read_with_python_encoding(path: str, pyversion: Tuple[int, int]) -> str: """Read the Python file with while obeying PEP-263 encoding detection""" source_bytearray = bytearray() @@ -1161,3 +629,836 @@ def read_with_python_encoding(path: str, pyversion: Tuple[int, int]) -> str: source_bytearray.extend(f.read()) return source_bytearray.decode(encoding) + + +MYPY_CACHE = '.mypy_cache' + + +def get_cache_names(id: str, path: str, pyversion: Tuple[int, int]) -> Tuple[str, str]: + """Return the file names for the cache files. + + Args: + id: module ID + path: module path (used to recognize packages) + pyversion: Python version (major, minor) + + Returns: + A tuple with the file names to be used for the meta JSON and the + data JSON, respectively. + """ + prefix = os.path.join(MYPY_CACHE, '%d.%d' % pyversion, *id.split('.')) + is_package = os.path.basename(path).startswith('__init__.py') + if is_package: + prefix = os.path.join(prefix, '__init__') + return (prefix + '.meta.json', prefix + '.data.json') + + +def find_cache_meta(id: str, path: str, manager: BuildManager) -> Optional[CacheMeta]: + """Find cache data for a module. + + Args: + id: module ID + path: module path + manager: the build manager (for pyversion, log/trace, and build options) + + Returns: + A CacheMeta instance if the cache data was found and appears + valid; otherwise None. + """ + # TODO: May need to take more build options into account; in + # particular SILENT_IMPORTS may affect the cache dramatically. + meta_json, data_json = get_cache_names(id, path, manager.pyversion) + manager.trace('Looking for {} {}'.format(id, data_json)) + if not os.path.exists(meta_json): + return None + with open(meta_json, 'r') as f: + meta_str = f.read() + manager.trace('Meta {} {}'.format(id, meta_str.rstrip())) + meta = json.loads(meta_str) # TODO: Errors + if not isinstance(meta, dict): + return None + path = os.path.abspath(path) + m = CacheMeta( + meta.get('id'), + meta.get('path'), + meta.get('mtime'), + meta.get('size'), + meta.get('dependencies'), + meta.get('data_mtime'), + data_json, + ) + if (m.id != id or m.path != path or + m.mtime is None or m.size is None or + m.dependencies is None or m.data_mtime is None): + return None + # TODO: Share stat() outcome with find_module() + st = os.stat(path) # TODO: Errors + if st.st_mtime != m.mtime or st.st_size != m.size: + manager.log('Metadata abandoned because of modified file {}'.format(path)) + return None + # It's a match on (id, path, mtime, size). + # Check data_json; assume if its mtime matches it's good. + # TODO: stat() errors + if os.path.getmtime(data_json) != m.data_mtime: + return None + manager.log('Found {} {}'.format(id, meta_json)) + return m + + +def random_string(): + return binascii.hexlify(os.urandom(8)).decode('ascii') + + +def write_cache(id: str, path: str, tree: MypyFile, dependencies: List[str], + manager: BuildManager) -> None: + """Write cache files for a module. + + Args: + id: module ID + path: module path + tree: the fully checked module data + dependencies: module IDs on which this module depends + manager: the build manager (for pyversion, log/trace) + """ + path = os.path.abspath(path) + manager.trace('Dumping {} {}'.format(id, path)) + st = os.stat(path) # TODO: Errors + mtime = st.st_mtime + size = st.st_size + meta_json, data_json = get_cache_names(id, path, manager.pyversion) + manager.log('Writing {} {} {}'.format(id, meta_json, data_json)) + data = tree.serialize() + parent = os.path.dirname(data_json) + if not os.path.isdir(parent): + os.makedirs(parent) + assert os.path.dirname(meta_json) == parent + nonce = '.' + random_string() + data_json_tmp = data_json + nonce + meta_json_tmp = meta_json + nonce + with open(data_json_tmp, 'w') as f: + json.dump(data, f, indent=2, sort_keys=True) + f.write('\n') + data_mtime = os.path.getmtime(data_json_tmp) + meta = {'id': id, + 'path': path, + 'mtime': mtime, + 'size': size, + 'data_mtime': data_mtime, + 'dependencies': dependencies, + } + with open(meta_json_tmp, 'w') as f: + json.dump(meta, f, sort_keys=True) + f.write('\n') + # TODO: On Windows, os.rename() may not be atomic, and we could + # use os.replace(). However that's new in Python 3.3. + os.rename(data_json_tmp, data_json) + os.rename(meta_json_tmp, meta_json) + + +"""Dependency manager. + +Design +====== + +Ideally +------- + +A. Collapse cycles (each SCC -- strongly connected component -- + becomes one "supernode"). + +B. Topologically sort nodes based on dependencies. + +C. Process from leaves towards roots. + +Wrinkles +-------- + +a. Need to parse source modules to determine dependencies. + +b. Processing order for modules within an SCC. + +c. Must order mtimes of files to decide whether to re-process; depends + on clock never resetting. + +d. from P import M; checks filesystem whether module P.M exists in + filesystem. + +e. Race conditions, where somebody modifies a file while we're + processing. I propose not to modify the algorithm to handle this, + but to detect when this could lead to inconsistencies. (For + example, when we decide on the dependencies based on cache + metadata, and then we decide to re-parse a file because of a stale + dependency, if the re-parsing leads to a different list of + dependencies we should warn the user or start over.) + +Steps +----- + +1. For each explicitly given module find the source file location. + +2. For each such module load and check the cache metadata, and decide + whether it's valid. + +3. Now recursively (or iteratively) find dependencies and add those to + the graph: + + - for cached nodes use the list of dependencies from the cache + metadata (this will be valid even if we later end up re-parsing + the same source); + + - for uncached nodes parse the file and process all imports found, + taking care of (a) above. + +Step 3 should also address (d) above. + +Once step 3 terminates we have the entire dependency graph, and for +each module we've either loaded the cache metadata or parsed the +source code. (However, we may still need to parse those modules for +which we have cache metadata but that depend, directly or indirectly, +on at least one module for which the cache metadata is stale.) + +Now we can execute steps A-C from the first section. Finding SCCs for +step A shouldn't be hard; there's a recipe here: +http://code.activestate.com/recipes/578507/. There's also a plethora +of topsort recipes, e.g. http://code.activestate.com/recipes/577413/. + +For single nodes, processing is simple. If the node was cached, we +deserialize the cache data and fix up cross-references. Otherwise, we +do semantic analysis followed by type checking. We also handle (c) +above; if a module has valid cache data *but* any of its +dependendencies was processed from source, then the module should be +processed from source. + +A relatively simple optimization (outside SCCs) we might do in the +future is as follows: if a node's cache data is valid, but one or more +of its dependencies are out of date so we have to re-parse the node +from source, once we have fully type-checked the node, we can decide +whether its symbol table actually changed compared to the cache data +(by reading the cache data and comparing it to the data we would be +writing). If there is no change we can declare the node up to date, +and any node that depends (and for which we have cached data, and +whose other dependencies are up to date) on it won't need to be +re-parsed from source. + +Import cycles +------------- + +Finally we have to decide how to handle (c), import cycles. Here +we'll need a modified version of the original state machine +(build.py), but we only need to do this per SCC, and we won't have to +deal with changes to the list of nodes while we're processing it. + +If all nodes in the SCC have valid cache metadata and all dependencies +outside the SCC are still valid, we can proceed as follows: + + 1. Load cache data for all nodes in the SCC. + + 2. Fix up cross-references for all nodes in the SCC. + +Otherwise, the simplest (but potentially slow) way to proceed is to +invalidate all cache data in the SCC and re-parse all nodes in the SCC +from source. We can do this as follows: + + 1. Parse source for all nodes in the SCC. + + 2. Semantic analysis for all nodes in the SCC. + + 3. Type check all nodes in the SCC. + +(If there are more passes the process is the same -- each pass should +be done for all nodes before starting the next pass for any nodes in +the SCC.) + +We could process the nodes in the SCC in any order. For sentimental +reasons, I've decided to process them in the reverse order in which we +encountered them when originally constructing the graph. That's how +the old build.py deals with cycles, and at least this reproduces the +previous implementation more accurately. + +Can we do better than re-parsing all nodes in the SCC when any of its +dependencies are out of date? It's doubtful. The optimization +mentioned at the end of the previous section would require re-parsing +and type-checking a node and then comparing its symbol table to the +cached data; but because the node is part of a cycle we can't +technically type-check it until the semantic analysis of all other +nodes in the cycle has completed. (This is an important issue because +Dropbox has a very large cycle in production code. But I'd like to +deal with it later.) + +Additional wrinkles +------------------- + +During implementation more wrinkles were found. + +- When a submodule of a package (e.g. x.y) is encountered, the parent + package (e.g. x) must also be loaded, but it is not strictly a + dependency. See State.add_ancestors() below. +""" + + +class ModuleNotFound(Exception): + """Control flow exception to signal that a module was not found.""" + + +class State: + """The state for a module. + + The source is only used for the -c command line option; in that + case path is None. Otherwise source is None and path isn't. + """ + + manager = None # type: BuildManager + order_counter = 0 # Class variable + order = None # type: int # Order in which modules were encountered + id = None # type: str # Fully qualified module name + path = None # type: Optional[str] # Path to module source + xpath = None # type: str # Path or '' + source = None # type: Optional[str] # Module source code + meta = None # type: Optional[CacheMeta] + data = None # type: Optional[str] + tree = None # type: Optional[MypyFile] + dependencies = None # type: List[str] + + # Map each dependency to the line number where it is first imported + dep_line_map = None # type: Dict[str, int] + + # Parent package, its parent, etc. + ancestors = None # type: Optional[List[str]] + + # List of (path, line number) tuples giving context for import + import_context = None # type: List[Tuple[str, int]] + + # The State from which this module was imported, if any + caller_state = None # type: Optional[State] + + # If caller_state is set, the line number in the caller where the import occurred + caller_line = 0 + + def __init__(self, + id: Optional[str], + path: Optional[str], + source: Optional[str], + manager: BuildManager, + caller_state: 'State' = None, + caller_line: int = 0, + is_ancestor: bool = False, + ) -> None: + assert id or path or source is not None, "Neither id, path nor source given" + self.manager = manager + State.order_counter += 1 + self.order = State.order_counter + self.caller_state = caller_state + self.caller_line = caller_line + if caller_state: + self.import_context = caller_state.import_context[:] + self.import_context.append((caller_state.xpath, caller_line)) + else: + self.import_context = [] + self.id = id or '__main__' + if not path and source is None: + file_id = id + if id == 'builtins' and manager.pyversion[0] == 2: + # The __builtin__ module is called internally by mypy + # 'builtins' in Python 2 mode (similar to Python 3), + # but the stub file is __builtin__.pyi. The reason is + # that a lot of code hard-codes 'builtins.x' and it's + # easier to work it around like this. It also means + # that the implementation can mostly ignore the + # difference and just assume 'builtins' everywhere, + # which simplifies code. + file_id = '__builtin__' + path = find_module(file_id, manager.lib_path) + if path: + # In silent mode, don't import .py files. + if (SILENT_IMPORTS in manager.flags and + path.endswith('.py') and (caller_state or is_ancestor)): + path = None + manager.missing_modules.add(id) + raise ModuleNotFound + else: + # Could not find a module. Typically the reason is a + # misspelled module name, missing stub, module not in + # search path or the module has not been installed. + if caller_state: + if not (SILENT_IMPORTS in manager.flags or + (caller_state.tree is not None and + (caller_line in caller_state.tree.ignored_lines or + 'import' in caller_state.tree.weak_opts))): + save_import_context = manager.errors.import_context() + manager.errors.set_import_context(caller_state.import_context) + manager.module_not_found(caller_state.xpath, caller_line, id) + manager.errors.set_import_context(save_import_context) + manager.missing_modules.add(id) + raise ModuleNotFound + else: + # If we can't find a root source it's always fatal. + # TODO: This might hide non-fatal errors from + # root sources processed earlier. + raise CompileError(["mypy: can't find module '%s'" % id]) + self.path = path + self.xpath = path or '' + self.source = source + if path and source is None and INCREMENTAL in manager.flags: + self.meta = find_cache_meta(self.id, self.path, manager) + # TODO: Get mtime if not cached. + self.add_ancestors() + if self.meta: + self.dependencies = self.meta.dependencies + self.dep_line_map = {} + else: + # Parse the file (and then some) to get the dependencies. + self.parse_file() + + def add_ancestors(self) -> None: + # All parent packages are new ancestors. + ancestors = [] + parent = self.id + while '.' in parent: + parent, _ = parent.rsplit('.', 1) + ancestors.append(parent) + self.ancestors = ancestors + + def is_fresh(self) -> bool: + """Return whether the cache data for this file is fresh.""" + return self.meta is not None + + def clear_fresh(self) -> None: + """Throw away the cache data for this file, marking it as stale.""" + self.meta = None + + def check_blockers(self) -> None: + """Raise CompileError if a blocking error is detected.""" + if self.manager.errors.is_blockers(): + self.manager.log("Bailing due to blocking errors") + self.manager.errors.raise_error() + + @contextlib.contextmanager + def wrap_context(self) -> Iterator[None]: + save_import_context = self.manager.errors.import_context() + self.manager.errors.set_import_context(self.import_context) + yield + self.manager.errors.set_import_context(save_import_context) + self.check_blockers() + + # Methods for processing cached modules. + + def load_tree(self) -> None: + with open(self.meta.data_json) as f: + data = json.load(f) + # TODO: Assert data file wasn't changed. + self.tree = MypyFile.deserialize(data) + self.manager.modules[self.id] = self.tree + + def fix_cross_refs(self) -> None: + fixup_module_pass_one(self.tree, self.manager.modules) + + def calculate_mros(self) -> None: + fixup_module_pass_two(self.tree, self.manager.modules) + + # Methods for processing modules from source code. + + def parse_file(self) -> None: + if self.tree is not None: + # The file was already parsed (in __init__()). + return + + manager = self.manager + modules = manager.modules + manager.log("Parsing %s" % self.xpath) + + with self.wrap_context(): + source = self.source + self.source = None # We won't need it again. + if self.path and source is None: + try: + source = read_with_python_encoding(self.path, manager.pyversion) + except IOError as ioerr: + raise CompileError([ + "mypy: can't read file '{}': {}".format(self.path, ioerr.strerror)]) + except UnicodeDecodeError as decodeerr: + raise CompileError([ + "mypy: can't decode file '{}': {}".format(self.path, str(decodeerr))]) + self.tree = manager.parse_file(self.id, self.xpath, source) + + modules[self.id] = self.tree + + # Do the first pass of semantic analysis: add top-level + # definitions in the file to the symbol table. We must do + # this before processing imports, since this may mark some + # import statements as unreachable. + first = FirstPass(manager.semantic_analyzer) + first.analyze(self.tree, self.xpath, self.id) + + # Initialize module symbol table, which was populated by the + # semantic analyzer. + # TODO: Why can't FirstPass .analyze() do this? + self.tree.names = manager.semantic_analyzer.globals + + # Compute (direct) dependencies. + # Add all direct imports (this is why we needed the first pass). + # Also keep track of each dependency's source line. + dependencies = [] + dep_line_map = {} # type: Dict[str, int] # id -> line + for id, line in manager.all_imported_modules_in_file(self.tree): + # Omit missing modules, as otherwise we could not type-check + # programs with missing modules. + if id == self.id or id in manager.missing_modules: + continue + if id == '': + # Must be from a relative import. + manager.errors.set_file(self.xpath) + manager.errors.report(line, "No parent module -- cannot perform relative import", + blocker=True) + if id not in dep_line_map: + dependencies.append(id) + dep_line_map[id] = line + # Every module implicitly depends on builtins. + if self.id != 'builtins' and 'builtins' not in dep_line_map: + dependencies.append('builtins') + + # If self.dependencies is already set, it was read from the + # cache, but for some reason we're re-parsing the file. + # Double-check that the dependencies still match (otherwise + # the graph is out of date). + if self.dependencies is not None and dependencies != self.dependencies: + # Presumably the file was edited while we were running. + # TODO: Make this into a reasonable error message, or recover somehow. + print("HELP!! Dependencies changed!") + print(" Cached:", self.dependencies) + print(" Source:", dependencies) + assert False, "Cache inconsistency for dependencies of %s" % (self.id,) + self.dependencies = dependencies + self.dep_line_map = dep_line_map + self.check_blockers() + + def patch_parent(self) -> None: + # Include module in the symbol table of the enclosing package. + if '.' not in self.id: + return + manager = self.manager + modules = manager.modules + parent, child = self.id.rsplit('.', 1) + if parent in modules: + manager.trace("Added %s.%s" % (parent, child)) + modules[parent].names[child] = SymbolTableNode(MODULE_REF, self.tree, parent) + else: + manager.log("Hm... couldn't add %s.%s" % (parent, child)) + + def semantic_analysis(self) -> None: + with self.wrap_context(): + self.manager.semantic_analyzer.visit_file(self.tree, self.xpath) + + def semantic_analysis_pass_three(self) -> None: + with self.wrap_context(): + self.manager.semantic_analyzer_pass3.visit_file(self.tree, self.xpath) + if DUMP_TYPE_STATS in self.manager.flags: + dump_type_stats(self.tree, self.xpath) + + def type_check(self) -> None: + manager = self.manager + if manager.target < TYPE_CHECK: + return + with self.wrap_context(): + manager.type_checker.visit_file(self.tree, self.xpath) + if DUMP_INFER_STATS in manager.flags: + dump_type_stats(self.tree, self.xpath, inferred=True, + typemap=manager.type_checker.type_map) + manager.report_file(self.tree) + + def write_cache(self) -> None: + if self.path and INCREMENTAL in self.manager.flags and not self.manager.errors.is_errors(): + write_cache(self.id, self.path, self.tree, list(self.dependencies), self.manager) + + +Graph = Dict[str, State] + + +def dispatch(sources: List[BuildSource], manager: BuildManager) -> None: + manager.log("Using new dependency manager") + graph = load_graph(sources, manager) + manager.log("Loaded graph with %d nodes" % len(graph)) + process_graph(graph, manager) + + +def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph: + """Given some source files, load the full dependency graph.""" + graph = {} # type: Graph + # The deque is used to implement breadth-first traversal. + # TODO: Consider whether to go depth-first instead. This may + # affect the order in which we process files within import cycles. + new = collections.deque() # type: collections.deque[State] + # Seed the graph with the initial root sources. + for bs in sources: + try: + st = State(id=bs.module, path=bs.path, source=bs.text, manager=manager) + except ModuleNotFound: + continue + if st.id in graph: + manager.errors.set_file(st.xpath) + manager.errors.report(1, "Duplicate module named '%s'" % st.id) + manager.errors.raise_error() + graph[st.id] = st + new.append(st) + # Collect dependencies. We go breadth-first. + while new: + st = new.popleft() + for dep in st.ancestors + st.dependencies: + if dep not in graph: + try: + if dep in st.ancestors: + # TODO: Why not 'if dep not in st.dependencies' ? + # Ancestors don't have import context. + newst = State(id=dep, path=None, source=None, manager=manager, + is_ancestor=True) + else: + newst = State(id=dep, path=None, source=None, manager=manager, + caller_state=st, caller_line=st.dep_line_map.get(dep, 1)) + except ModuleNotFound: + if dep in st.dependencies: + st.dependencies.remove(dep) + else: + assert newst.id not in graph, newst.id + graph[newst.id] = newst + new.append(newst) + return graph + + +def process_graph(graph: Graph, manager: BuildManager) -> None: + """Process everything in dependency order.""" + sccs = sorted_components(graph) + manager.log("Found %d SCCs; largest has %d nodes" % + (len(sccs), max(len(scc) for scc in sccs))) + # We're processing SCCs from leaves (those without further + # dependencies) to roots (those from which everything else can be + # reached). + for ascc in sccs: + # Sort the SCC's nodes in *reverse* order or encounter. + # This is a heuristic for handling import cycles. + # Note that ascc is a set, and scc is a list. + scc = sorted(ascc, key=lambda id: -graph[id].order) + # If builtins is in the list, move it last. (This is a bit of + # a hack, but it's necessary because the builtins module is + # part of a small cycle involving at least {builtins, abc, + # typing}. Of these, builtins must be processed last or else + # some builtin objects will be incompletely processed.) + if 'builtins' in ascc: + scc.remove('builtins') + scc.append('builtins') + # Because the SCCs are presented in topological sort order, we + # don't need to look at dependencies recursively for staleness + # -- the immediate dependencies are sufficient. + stale_scc = {id for id in scc if not graph[id].is_fresh()} + fresh = not stale_scc + deps = set() + for id in scc: + deps.update(graph[id].dependencies) + deps -= ascc + stale_deps = {id for id in deps if not graph[id].is_fresh()} + fresh = fresh and not stale_deps + if fresh: + # All cache files are fresh. Check that no dependency's + # cache file is newer than any scc node's cache file. + oldest_in_scc = min(graph[id].meta.data_mtime for id in scc) + newest_in_deps = 0 if not deps else max(graph[dep].meta.data_mtime for dep in deps) + if manager.flags.count(VERBOSE) >= 2: # Dump all mtimes for extreme debugging. + all_ids = sorted(ascc | deps, key=lambda id: graph[id].meta.data_mtime) + for id in all_ids: + if id in scc: + if graph[id].meta.data_mtime < newest_in_deps: + key = "*id:" + else: + key = "id:" + else: + if graph[id].meta.data_mtime > oldest_in_scc: + key = "+dep:" + else: + key = "dep:" + manager.trace(" %5s %.0f %s" % (key, graph[id].meta.data_mtime, id)) + # If equal, give the benefit of the doubt, due to 1-sec time granularity + # (on some platforms). + if oldest_in_scc < newest_in_deps: + fresh = False + fresh_msg = "out of date by %.0f seconds" % (newest_in_deps - oldest_in_scc) + else: + fresh_msg = "fresh" + elif stale_scc: + fresh_msg = "inherently stale (%s)" % " ".join(sorted(stale_scc)) + if stale_deps: + fresh_msg += " with stale deps (%s)" % " ".join(sorted(stale_deps)) + else: + fresh_msg = "stale due to deps (%s)" % " ".join(sorted(stale_deps)) + if len(scc) == 1: + manager.log("Processing SCC sigleton (%s) as %s" % (" ".join(scc), fresh_msg)) + else: + manager.log("Processing SCC of size %d (%s) as %s" % + (len(scc), " ".join(scc), fresh_msg)) + if fresh: + process_fresh_scc(graph, scc) + else: + process_stale_scc(graph, scc) + + +def process_fresh_scc(graph: Graph, scc: List[str]) -> None: + """Process the modules in one SCC from their cached data.""" + for id in scc: + graph[id].load_tree() + for id in scc: + graph[id].patch_parent() + for id in scc: + graph[id].fix_cross_refs() + for id in scc: + graph[id].calculate_mros() + + +def process_stale_scc(graph: Graph, scc: List[str]) -> None: + """Process the modules in one SCC from source code.""" + for id in scc: + graph[id].clear_fresh() + for id in scc: + # We may already have parsed the module, or not. + # If the former, parse_file() is a no-op. + graph[id].parse_file() + for id in scc: + graph[id].patch_parent() + for id in scc: + graph[id].semantic_analysis() + for id in scc: + graph[id].semantic_analysis_pass_three() + for id in scc: + graph[id].type_check() + graph[id].write_cache() + + +def sorted_components(graph: Graph) -> List[AbstractSet[str]]: + """Return the graph's SCCs, topologically sorted by dependencies. + + The sort order is from leaves (nodes without dependencies) to + roots (nodes on which no other nodes depend). + + This works for a subset of the full dependency graph too; + dependencies that aren't present in graph.keys() are ignored. + """ + # Compute SCCs. + vertices = set(graph) + edges = {id: [dep for dep in st.dependencies if dep in graph] + for id, st in graph.items()} + sccs = list(strongly_connected_components(vertices, edges)) + # Topsort. + sccsmap = {id: frozenset(scc) for scc in sccs for id in scc} + data = {} # type: Dict[AbstractSet[str], Set[AbstractSet[str]]] + for scc in sccs: + deps = set() # type: Set[AbstractSet[str]] + for id in scc: + deps.update(sccsmap[x] for x in graph[id].dependencies if x in graph) + data[frozenset(scc)] = deps + res = [] + for ready in topsort(data): + # Sort the sets in ready by reversed smallest State.order. Examples: + # + # - If ready is [{x}, {y}], x.order == 1, y.order == 2, we get + # [{y}, {x}]. + # + # - If ready is [{a, b}, {c, d}], a.order == 1, b.order == 3, + # c.order == 2, d.order == 4, the sort keys become [1, 2] + # and the result is [{c, d}, {a, b}]. + res.extend(sorted(ready, + key=lambda scc: -min(graph[id].order for id in scc))) + return res + + +def strongly_connected_components(vertices: Set[str], + edges: Dict[str, List[str]]) -> Iterator[Set[str]]: + """Compute Strongly Connected Components of a directed graph. + + Args: + vertices: the labels for the vertices + edges: for each vertex, gives the target vertices of its outgoing edges + + Returns: + An iterator yielding strongly connected components, each + represented as a set of vertices. Each input vertex will occur + exactly once; vertices not part of a SCC are returned as + singleton sets. + + From http://code.activestate.com/recipes/578507/. + """ + identified = set() # type: Set[str] + stack = [] # type: List[str] + index = {} # type: Dict[str, int] + boundaries = [] # type: List[int] + + def dfs(v: str) -> Iterator[Set[str]]: + index[v] = len(stack) + stack.append(v) + boundaries.append(index[v]) + + for w in edges[v]: + if w not in index: + # For Python >= 3.3, replace with "yield from dfs(w)" + for scc in dfs(w): + yield scc + elif w not in identified: + while index[w] < boundaries[-1]: + boundaries.pop() + + if boundaries[-1] == index[v]: + boundaries.pop() + scc = set(stack[index[v]:]) + del stack[index[v]:] + identified.update(scc) + yield scc + + for v in vertices: + if v not in index: + # For Python >= 3.3, replace with "yield from dfs(v)" + for scc in dfs(v): + yield scc + + +def topsort(data: Dict[AbstractSet[str], + Set[AbstractSet[str]]]) -> Iterable[Set[AbstractSet[str]]]: + """Topological sort. + + Args: + data: A map from SCCs (represented as frozen sets of strings) to + sets of SCCs, its dependencies. NOTE: This data structure + is modified in place -- for normalization purposes, + self-dependencies are removed and entries representing + orphans are added. + + Returns: + An iterator yielding sets of SCCs that have an equivalent + ordering. NOTE: The algorithm doesn't care about the internal + structure of SCCs. + + Example: + Suppose the input has the following structure: + + {A: {B, C}, B: {D}, C: {D}} + + This is normalized to: + + {A: {B, C}, B: {D}, C: {D}, D: {}} + + The algorithm will yield the following values: + + {D} + {B, C} + {A} + + From http://code.activestate.com/recipes/577413/. + """ + # TODO: Use a faster algorithm? + for k, v in data.items(): + v.discard(k) # Ignore self dependencies. + for item in set.union(*data.values()) - set(data.keys()): + data[item] = set() + while True: + ready = {item for item, dep in data.items() if not dep} + if not ready: + break + yield ready + data = {item: (dep - ready) + for item, dep in data.items() + if item not in ready} + assert not data, "A cyclic dependency exists amongst %r" % data diff --git a/mypy/fixup.py b/mypy/fixup.py new file mode 100644 index 000000000000..213c957283f3 --- /dev/null +++ b/mypy/fixup.py @@ -0,0 +1,273 @@ +"""Fix up various things after deserialization.""" + +from typing import Any, Dict, Optional, cast + +from mypy.nodes import (MypyFile, SymbolNode, SymbolTable, SymbolTableNode, + TypeInfo, FuncDef, OverloadedFuncDef, Decorator, Var, + TypeVarExpr, ClassDef, + LDEF, MDEF, GDEF, MODULE_REF) +from mypy.types import (CallableType, EllipsisType, Instance, Overloaded, TupleType, + TypeList, TypeVarType, UnboundType, UnionType, TypeVisitor) +from mypy.visitor import NodeVisitor + + +def fixup_module_pass_one(tree: MypyFile, modules: Dict[str, MypyFile]) -> None: + node_fixer = NodeFixer(modules) + node_fixer.visit_symbol_table(tree.names) + + +def fixup_module_pass_two(tree: MypyFile, modules: Dict[str, MypyFile]) -> None: + compute_all_mros(tree.names, modules) + + +def compute_all_mros(symtab: SymbolTable, modules: Dict[str, MypyFile]) -> None: + for key, value in symtab.items(): + if value.kind in (LDEF, MDEF, GDEF) and isinstance(value.node, TypeInfo): + info = value.node + info.calculate_mro() + assert info.mro, "No MRO calculated for %s" % (info.fullname(),) + compute_all_mros(info.names, modules) + + +# TODO: Fix up .info when deserializing, i.e. much earlier. +class NodeFixer(NodeVisitor[None]): + current_info = None # type: Optional[TypeInfo] + + def __init__(self, modules: Dict[str, MypyFile], type_fixer: 'TypeFixer' = None) -> None: + self.modules = modules + if type_fixer is None: + type_fixer = TypeFixer(self.modules) + self.type_fixer = type_fixer + + # NOTE: This method isn't (yet) part of the NodeVisitor API. + def visit_type_info(self, info: TypeInfo) -> None: + save_info = self.current_info + try: + self.current_info = info + if info.defn: + info.defn.accept(self) + if info.names: + self.visit_symbol_table(info.names) + if info.subtypes: + for st in info.subtypes: + self.visit_type_info(st) + if info.bases: + for base in info.bases: + base.accept(self.type_fixer) + if info._promote: + info._promote.accept(self.type_fixer) + if info.tuple_type: + info.tuple_type.accept(self.type_fixer) + finally: + self.current_info = save_info + + # NOTE: This method *definitely* isn't part of the NodeVisitor API. + def visit_symbol_table(self, symtab: SymbolTable) -> None: + # Copy the items because we may mutate symtab. + for key, value in list(symtab.items()): + cross_ref = value.cross_ref + if cross_ref is not None: # Fix up cross-reference. + del value.cross_ref + if cross_ref in self.modules: + value.node = self.modules[cross_ref] + else: + stnode = lookup_qualified_stnode(self.modules, cross_ref) + assert stnode is not None, "Could not find cross-ref %s" % (cross_ref,) + value.node = stnode.node + value.type_override = stnode.type_override + else: + if isinstance(value.node, TypeInfo): + # TypeInfo has no accept(). TODO: Add it? + self.visit_type_info(value.node) + elif value.node is not None: + value.node.accept(self) + if value.type_override is not None: + value.type_override.accept(self.type_fixer) + + def visit_func_def(self, func: FuncDef) -> None: + if self.current_info is not None: + func.info = self.current_info + if func.type is not None: + func.type.accept(self.type_fixer) + for arg in func.arguments: + if arg.type_annotation is not None: + arg.type_annotation.accept(self.type_fixer) + + def visit_overloaded_func_def(self, o: OverloadedFuncDef) -> None: + if self.current_info is not None: + o.info = self.current_info + if o.type: + o.type.accept(self.type_fixer) + for item in o.items: + item.accept(self) + + def visit_decorator(self, d: Decorator) -> None: + if self.current_info is not None: + d.var.info = self.current_info + if d.func: + d.func.accept(self) + if d.var: + d.var.accept(self) + for node in d.decorators: + node.accept(self) + + def visit_class_def(self, c: ClassDef) -> None: + for v in c.type_vars: + for value in v.values: + value.accept(self.type_fixer) + + def visit_type_var_expr(self, tv: TypeVarExpr) -> None: + for value in tv.values: + value.accept(self.type_fixer) + + def visit_var(self, v: Var) -> None: + if self.current_info is not None: + v.info = self.current_info + if v.type is not None: + v.type.accept(self.type_fixer) + + +class TypeFixer(TypeVisitor[None]): + def __init__(self, modules: Dict[str, MypyFile]) -> None: + self.modules = modules + + def visit_instance(self, inst: Instance) -> None: + # TODO: Combine Instances that are exactly the same? + type_ref = inst.type_ref + if type_ref is None: + return # We've already been here. + del inst.type_ref + node = lookup_qualified(self.modules, type_ref) + if isinstance(node, TypeInfo): + inst.type = node + # TODO: Is this needed or redundant? + # Also fix up the bases, just in case. + for base in inst.type.bases: + if base.type is None: + base.accept(self) + for a in inst.args: + a.accept(self) + + def visit_any(self, o: Any) -> None: + pass # Nothing to descend into. + + def visit_callable_type(self, ct: CallableType) -> None: + if ct.fallback: + ct.fallback.accept(self) + for argt in ct.arg_types: + # argt may be None, e.g. for __self in NamedTuple constructors. + if argt is not None: + argt.accept(self) + if ct.ret_type is not None: + ct.ret_type.accept(self) + for v in ct.variables: + if v.values: + for val in v.values: + val.accept(self) + v.upper_bound.accept(self) + for i, t in ct.bound_vars: + t.accept(self) + + def visit_ellipsis_type(self, e: EllipsisType) -> None: + pass # Nothing to descend into. + + def visit_overloaded(self, t: Overloaded) -> None: + for ct in t.items(): + ct.accept(self) + + def visit_deleted_type(self, o: Any) -> None: + pass # Nothing to descend into. + + def visit_none_type(self, o: Any) -> None: + pass # Nothing to descend into. + + def visit_partial_type(self, o: Any) -> None: + raise RuntimeError("Shouldn't get here", o) + + def visit_tuple_type(self, tt: TupleType) -> None: + if tt.items: + for it in tt.items: + it.accept(self) + if tt.fallback is not None: + tt.fallback.accept(self) + + def visit_type_list(self, tl: TypeList) -> None: + for t in tl.items: + t.accept(self) + + def visit_type_var(self, tvt: TypeVarType) -> None: + if tvt.values: + for vt in tvt.values: + vt.accept(self) + if tvt.upper_bound is not None: + tvt.upper_bound.accept(self) + + def visit_unbound_type(self, o: UnboundType) -> None: + for a in o.args: + a.accept(self) + + def visit_union_type(self, ut: UnionType) -> None: + if ut.items: + for it in ut.items: + it.accept(self) + + def visit_void(self, o: Any) -> None: + pass # Nothing to descend into. + + +def lookup_qualified(modules: Dict[str, MypyFile], name: str) -> SymbolNode: + stnode = lookup_qualified_stnode(modules, name) + if stnode is None: + return None + else: + return stnode.node + + +def lookup_qualified_stnode(modules: Dict[str, MypyFile], name: str) -> SymbolTableNode: + head = name + rest = [] + while True: + head, tail = head.rsplit('.', 1) + mod = modules.get(head) + if mod is not None: + rest.append(tail) + break + names = mod.names + while True: + assert rest, "Cannot find %s" % (name,) + key = rest.pop() + assert key in names, "Cannot find %s for %s" % (key, name) + stnode = names[key] + if not rest: + return stnode + node = stnode.node + assert isinstance(node, TypeInfo) + names = cast(TypeInfo, node).names + + +def store_qualified(modules: Dict[str, MypyFile], name: str, info: SymbolNode) -> None: + head = name + rest = [] + while True: + head, tail = head.rsplit('.', 1) + mod = modules.get(head) + if mod is not None: + rest.append(tail) + break + names = mod.names + while True: + assert rest, "Cannot find %s" % (name,) + key = rest.pop() + if key not in names: + assert not rest, "Cannot find %s for %s" % (key, name) + # Store it. + # TODO: kind might be something else? + names[key] = SymbolTableNode(GDEF, info) + return + stnode = names[key] + node = stnode.node + if not rest: + stnode.node = info + return + assert isinstance(node, TypeInfo) + names = cast(TypeInfo, node).names diff --git a/mypy/main.py b/mypy/main.py index 2ca3b6f95d05..d8b18b1b617b 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -10,7 +10,7 @@ from mypy import build from mypy import defaults from mypy import git -from mypy.build import BuildSource, PYTHON_EXTENSIONS +from mypy.build import BuildSource, BuildResult, PYTHON_EXTENSIONS from mypy.errors import CompileError, set_drop_into_pdb from mypy.version import __version__ @@ -46,14 +46,19 @@ def main(script_path: str) -> None: set_drop_into_pdb(True) if not options.dirty_stubs: git.verify_git_integrity_or_abort(build.default_data_dir(bin_dir)) + f = sys.stdout try: if options.target == build.TYPE_CHECK: - type_check_only(sources, bin_dir, options) + res = type_check_only(sources, bin_dir, options) + a = res.errors else: raise RuntimeError('unsupported target %d' % options.target) except CompileError as e: - f = sys.stdout if e.use_stdout else sys.stderr - for m in e.messages: + a = e.messages + if not e.use_stdout: + f = sys.stderr + if a: + for m in a: f.write(m + '\n') sys.exit(1) @@ -83,16 +88,16 @@ def readlinkabs(link: str) -> str: def type_check_only(sources: List[BuildSource], - bin_dir: str, options: Options) -> None: + bin_dir: str, options: Options) -> BuildResult: # Type-check the program and dependencies and translate to Python. - build.build(sources=sources, - target=build.TYPE_CHECK, - bin_dir=bin_dir, - pyversion=options.pyversion, - custom_typing_module=options.custom_typing_module, - report_dirs=options.report_dirs, - flags=options.build_flags, - python_path=options.python_path) + return build.build(sources=sources, + target=build.TYPE_CHECK, + bin_dir=bin_dir, + pyversion=options.pyversion, + custom_typing_module=options.custom_typing_module, + report_dirs=options.report_dirs, + flags=options.build_flags, + python_path=options.python_path) FOOTER = """environment variables: @@ -137,6 +142,8 @@ def parse_version(v): help="type check the interior of functions without type annotations") parser.add_argument('--fast-parser', action='store_true', help="enable experimental fast parser") + parser.add_argument('-i', '--incremental', action='store_true', + help="enable experimental module cache") parser.add_argument('-f', '--dirty-stubs', action='store_true', help="don't warn if typeshed is out of sync") parser.add_argument('--pdb', action='store_true', help="invoke pdb on fatal error") @@ -216,6 +223,8 @@ def parse_version(v): # experimental if args.fast_parser: options.build_flags.append(build.FAST_PARSER) + if args.incremental: + options.build_flags.append(build.INCREMENTAL) # Set reports. for flag, val in vars(args).items(): diff --git a/mypy/myunit/__init__.py b/mypy/myunit/__init__.py index 714c048bbc22..de3c2a581448 100644 --- a/mypy/myunit/__init__.py +++ b/mypy/myunit/__init__.py @@ -19,7 +19,7 @@ class AssertionFailure(Exception): - """Exception used to signal skipped test cases.""" + """Exception used to signal failed test cases.""" def __init__(self, s: str = None) -> None: if s: super().__init__(s) @@ -27,7 +27,9 @@ def __init__(self, s: str = None) -> None: super().__init__() -class SkipTestCaseException(Exception): pass +class SkipTestCaseException(Exception): + """Exception used to signal skipped test cases.""" + pass def assert_true(b: bool, msg: str = None) -> None: diff --git a/mypy/nodes.py b/mypy/nodes.py index 0a1a4b4da8c6..83739cd8976a 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -27,6 +27,8 @@ def get_line(self) -> int: pass T = TypeVar('T') +JsonDict = Dict[str, Any] + # Symbol table node kinds # @@ -37,7 +39,7 @@ def get_line(self) -> int: pass MDEF = 2 # type: int MODULE_REF = 3 # type: int # Type variable declared using TypeVar(...) has kind UNBOUND_TVAR. It's not -# valid as a type. A type variable is valid as a type (kind TVAR) within +# valid as a type. A type variable is valid as a type (kind BOUND_TVAR) within # (1) a generic class that uses the type variable as a type argument or # (2) a generic function that refers to the type variable in its signature. UNBOUND_TVAR = 4 # type: int @@ -46,6 +48,7 @@ def get_line(self) -> int: pass # Placeholder for a name imported via 'from ... import'. Second phase of # semantic will replace this the actual imported reference. This is # needed so that we can detect whether a name has been imported during +# XXX what? UNBOUND_IMPORTED = 7 # type: int @@ -63,6 +66,7 @@ def get_line(self) -> int: pass TYPE_ALIAS: 'TypeAlias', UNBOUND_IMPORTED: 'UnboundImported', } +inverse_node_kinds = {_kind: _name for _name, _kind in node_kinds.items()} implicit_module_attrs = {'__name__': '__builtins__.str', @@ -109,6 +113,21 @@ def get_line(self) -> int: def accept(self, visitor: NodeVisitor[T]) -> T: raise RuntimeError('Not implemented') + # NOTE: Can't use @abstractmethod, since many subclasses of Node + # don't implement serialize(). + def serialize(self) -> Any: + raise NotImplementedError('Cannot serialize {} instance'.format(self.__class__.__name__)) + + @classmethod + def deserialize(cls, data: JsonDict) -> 'Node': + classname = data['.class'] + glo = globals() + if classname in glo: + cl = glo[classname] + if issubclass(cl, cls) and 'deserialize' in cl.__dict__: + return cl.deserialize(data) + raise NotImplementedError('unexpected .class {}'.format(classname)) + class SymbolNode(Node): # Nodes that can be stored in a symbol table. @@ -121,6 +140,10 @@ def name(self) -> str: pass @abstractmethod def fullname(self) -> str: pass + @classmethod + def deserialize(cls, data: JsonDict) -> 'SymbolNode': + return cast(SymbolNode, super().deserialize(data)) + class MypyFile(SymbolNode): """The abstract syntax tree of a single source file.""" @@ -174,6 +197,26 @@ def is_package_init_file(self) -> bool: return not (self.path is None) and len(self.path) != 0 \ and os.path.basename(self.path).startswith('__init__.') + def serialize(self) -> JsonDict: + return {'.class': 'MypyFile', + '_name': self._name, + '_fullname': self._fullname, + 'names': self.names.serialize(self._fullname), + 'is_stub': self.is_stub, + 'path': self.path, + } + + @classmethod + def deserialize(cls, data: JsonDict) -> 'MypyFile': + assert data['.class'] == 'MypyFile', data + tree = MypyFile([], []) + tree._name = data['_name'] + tree._fullname = data['_fullname'] + tree.names = SymbolTable.deserialize(data['names']) + tree.is_stub = data['is_stub'] + tree.path = data['path'] + return tree + class ImportBase(Node): """Base class for all import statements.""" @@ -270,6 +313,25 @@ def name(self) -> str: def accept(self, visitor: NodeVisitor[T]) -> T: return visitor.visit_overloaded_func_def(self) + def serialize(self) -> JsonDict: + return {'.class': 'OverloadedFuncDef', + 'items': [i.serialize() for i in self.items], + 'type': None if self.type is None else self.type.serialize(), + 'fullname': self._fullname, + 'is_property': self.is_property, + } + + @classmethod + def deserialize(cls, data: JsonDict) -> 'OverloadedFuncDef': + assert data['.class'] == 'OverloadedFuncDef' + res = OverloadedFuncDef([Decorator.deserialize(d) for d in data['items']]) + if data.get('type') is not None: + res.type = mypy.types.Type.deserialize(data['type']) + res._fullname = data['fullname'] + res.is_property = data['is_property'] + # NOTE: res.info will be set in the fixup phase. + return res + class Argument(Node): """A single argument in a FuncItem.""" @@ -311,6 +373,25 @@ def set_line(self, target: Union[Token, Node, int]) -> Node: self.initialization_statement.set_line(self.line) self.initialization_statement.lvalues[0].set_line(self.line) + def serialize(self) -> JsonDict: + data = {'.class': 'Argument', + 'kind': self.kind, + 'variable': self.variable.serialize(), + 'type_annotation': (None if self.type_annotation is None + else self.type_annotation.serialize()), + } # type: JsonDict + # TODO: initializer? + return data + + @classmethod + def deserialize(cls, data: JsonDict) -> 'Argument': + assert data['.class'] == 'Argument' + return Argument(Var.deserialize(data['variable']), + (None if data.get('type_annotation') is None + else mypy.types.Type.deserialize(data['type_annotation'])), + None, # TODO: initializer? + kind=data['kind']) + class FuncItem(FuncBase): arguments = [] # type: List[Argument] @@ -364,7 +445,7 @@ class FuncDef(FuncItem): is_conditional = False # Defined conditionally (within block)? is_abstract = False is_property = False - original_def = None # type: Union[FuncDef, Var] # Original conditional definition + original_def = None # type: Union[None, FuncDef, Var] # Original conditional definition def __init__(self, name: str, # Function name @@ -383,6 +464,44 @@ def accept(self, visitor: NodeVisitor[T]) -> T: def is_constructor(self) -> bool: return self.info is not None and self._name == '__init__' + def serialize(self) -> JsonDict: + return {'.class': 'FuncDef', + 'name': self._name, + 'fullname': self._fullname, + 'arguments': [a.serialize() for a in self.arguments], + 'type': None if self.type is None else self.type.serialize(), + 'is_property': self.is_property, + 'is_overload': self.is_overload, + 'is_generator': self.is_generator, + 'is_static': self.is_static, + 'is_class': self.is_class, + 'is_decorated': self.is_decorated, + 'is_conditional': self.is_conditional, + 'is_abstract': self.is_abstract, + # TODO: Do we need expanded, original_def? + } + + @classmethod + def deserialize(cls, data: JsonDict) -> 'FuncDef': + assert data['.class'] == 'FuncDef' + body = Block([]) + ret = FuncDef(data['name'], + [Argument.deserialize(a) for a in data['arguments']], + body, + (None if data['type'] is None + else mypy.types.FunctionLike.deserialize(data['type']))) + ret._fullname = data['fullname'] + ret.is_property = data['is_property'] + ret.is_overload = data['is_overload'] + ret.is_generator = data['is_generator'] + ret.is_static = data['is_static'] + ret.is_class = data['is_class'] + ret.is_decorated = data['is_decorated'] + ret.is_conditional = data['is_conditional'] + ret.is_abstract = data['is_abstract'] + # NOTE: ret.info is set in the fixup phase. + return ret + class Decorator(SymbolNode): """A decorated function. @@ -391,7 +510,7 @@ class Decorator(SymbolNode): """ func = None # type: FuncDef # Decorated function - decorators = None # type: List[Node] # Decorators, at least one + decorators = None # type: List[Node] # Decorators, at least one # XXX Not true var = None # type: Var # Represents the decorated function obj is_overload = False @@ -411,6 +530,22 @@ def fullname(self) -> str: def accept(self, visitor: NodeVisitor[T]) -> T: return visitor.visit_decorator(self) + def serialize(self) -> JsonDict: + return {'.class': 'Decorator', + 'func': self.func.serialize(), + 'var': self.var.serialize(), + 'is_overload': self.is_overload, + } + + @classmethod + def deserialize(cls, data: JsonDict) -> 'Decorator': + assert data['.class'] == 'Decorator' + dec = Decorator(FuncDef.deserialize(data['func']), + [], + Var.deserialize(data['var'])) + dec.is_overload = data['is_overload'] + return dec + class Var(SymbolNode): """A variable. @@ -448,6 +583,37 @@ def fullname(self) -> str: def accept(self, visitor: NodeVisitor[T]) -> T: return visitor.visit_var(self) + def serialize(self) -> JsonDict: + # TODO: Leave default values out? + # NOTE: Sometimes self.is_ready is False here, but we don't care. + data = {'.class': 'Var', + 'name': self._name, + 'fullname': self._fullname, + 'type': None if self.type is None else self.type.serialize(), + 'is_self': self.is_self, + 'is_initialized_in_class': self.is_initialized_in_class, + 'is_staticmethod': self.is_staticmethod, + 'is_classmethod': self.is_classmethod, + 'is_property': self.is_property, + 'is_settable_property': self.is_settable_property, + } # type: JsonDict + return data + + @classmethod + def deserialize(cls, data: JsonDict) -> 'Var': + assert data['.class'] == 'Var' + name = data['name'] + type = None if data['type'] is None else mypy.types.Type.deserialize(data['type']) + v = Var(name, type) + v._fullname = data['fullname'] + v.is_self = data['is_self'] + v.is_initialized_in_class = data['is_initialized_in_class'] + v.is_staticmethod = data['is_staticmethod'] + v.is_classmethod = data['is_classmethod'] + v.is_property = data['is_property'] + v.is_settable_property = data['is_settable_property'] + return v + class ClassDef(Node): """Class definition""" @@ -466,16 +632,16 @@ class ClassDef(Node): # Built-in/extension class? (single implementation inheritance only) is_builtinclass = False - def __init__(self, name: str, defs: 'Block', + def __init__(self, + name: str, + defs: 'Block', type_vars: List['mypy.types.TypeVarDef'] = None, base_type_exprs: List[Node] = None, metaclass: str = None) -> None: - if not base_type_exprs: - base_type_exprs = [] self.name = name self.defs = defs self.type_vars = type_vars or [] - self.base_type_exprs = base_type_exprs + self.base_type_exprs = base_type_exprs or [] self.base_types = [] # Not yet semantically analyzed --> don't know base types self.metaclass = metaclass self.decorators = [] @@ -486,6 +652,30 @@ def accept(self, visitor: NodeVisitor[T]) -> T: def is_generic(self) -> bool: return self.info.is_generic() + def serialize(self) -> JsonDict: + # Not serialized: defs, base_type_exprs, decorators + return {'.class': 'ClassDef', + 'name': self.name, + 'fullname': self.fullname, + 'type_vars': [v.serialize() for v in self.type_vars], + 'base_types': [t.serialize() for t in self.base_types], + 'metaclass': self.metaclass, + 'is_builtinclass': self.is_builtinclass, + } + + @classmethod + def deserialize(self, data: JsonDict) -> 'ClassDef': + assert data['.class'] == 'ClassDef' + res = ClassDef(data['name'], + Block([]), + [mypy.types.TypeVarDef.deserialize(v) for v in data['type_vars']], + metaclass=data['metaclass'], + ) + res.fullname = data['fullname'] + res.base_types = [mypy.types.Instance.deserialize(t) for t in data['base_types']] + res.is_builtinclass = data['is_builtinclass'] + return res + class GlobalDecl(Node): """Declaration global x, y, ...""" @@ -895,12 +1085,32 @@ def __init__(self, name: str) -> None: self.name = name self.literal_hash = ('Var', name,) - def type_node(self): - return cast(TypeInfo, self.node) - def accept(self, visitor: NodeVisitor[T]) -> T: return visitor.visit_name_expr(self) + def serialize(self) -> JsonDict: + # TODO: Find out where and why NameExpr is being serialized (if at all). + assert False, "Serializing NameExpr: %s" % (self,) + return {'.class': 'NameExpr', + 'kind': self.kind, + 'node': None if self.node is None else self.node.serialize(), + 'fullname': self.fullname, + 'is_def': self.is_def, + 'name': self.name, + 'literal': self.literal, + } + + @classmethod + def deserialize(cls, data: JsonDict) -> 'NameExpr': + assert data['.class'] == 'NameExpr' + ret = NameExpr(data['name']) + ret.kind = data['kind'] + ret.node = None if data['node'] is None else Node.deserialize(data['node']) + ret.fullname = data['fullname'] + ret.is_def = data['is_def'] + ret.literal = data['literal'] + return ret + class MemberExpr(RefExpr): """Member access expression x.y""" @@ -1399,6 +1609,22 @@ def fullname(self) -> str: def accept(self, visitor: NodeVisitor[T]) -> T: return visitor.visit_type_var_expr(self) + def serialize(self) -> JsonDict: + return {'.class': 'TypeVarExpr', + 'name': self._name, + 'fullname': self._fullname, + 'values': [t.serialize() for t in self.values], + 'variance': self.variance, + } + + @classmethod + def deserialize(cls, data: JsonDict) -> 'TypeVarExpr': + assert data['.class'] == 'TypeVarExpr' + return TypeVarExpr(data['name'], + data['fullname'], + [mypy.types.Type.deserialize(v) for v in data['values']], + data['variance']) + class TypeAliasExpr(Node): """Type alias expression (rvalue).""" @@ -1512,6 +1738,12 @@ class is generic then it will be a type constructor of higher kind. # Is this a named tuple type? is_named_tuple = False + # Is this a dummy from deserialization? + is_dummy = False + + # Alternative to fullname() for 'anonymous' classes. + alt_fullname = None # type: Optional[str] + def __init__(self, names: 'SymbolTable', defn: ClassDef) -> None: """Initialize a TypeInfo.""" self.names = names @@ -1606,16 +1838,19 @@ def calculate_mro(self) -> None: Raise MroError if cannot determine mro. """ - self.mro = linearize_hierarchy(self) + mro = linearize_hierarchy(self) + assert mro, "Could not produce a MRO at all for %s" % (self,) + self.mro = mro def has_base(self, fullname: str) -> bool: """Return True if type has a base type with the specified name. This can be either via extension or via implementation. """ - for cls in self.mro: - if cls.fullname() == fullname: - return True + if self.mro: + for cls in self.mro: + if cls.fullname() == fullname: + return True return False def all_subtypes(self) -> 'Set[TypeInfo]': @@ -1651,6 +1886,46 @@ def __str__(self) -> str: ('Names', sorted(self.names.keys()))], 'TypeInfo') + def serialize(self) -> Union[str, JsonDict]: + # NOTE: This is where all ClassDefs originate, so there shouldn't be duplicates. + data = {'.class': 'TypeInfo', + 'fullname': self.fullname(), + 'alt_fullname': self.alt_fullname, + 'names': self.names.serialize(self.alt_fullname or self.fullname()), + 'defn': self.defn.serialize(), + 'is_abstract': self.is_abstract, + 'abstract_attributes': self.abstract_attributes, + 'is_enum': self.is_enum, + 'fallback_to_any': self.fallback_to_any, + 'type_vars': self.type_vars, + 'bases': [b.serialize() for b in self.bases], + '_promote': None if self._promote is None else self._promote.serialize(), + 'tuple_type': None if self.tuple_type is None else self.tuple_type.serialize(), + 'is_named_tuple': self.is_named_tuple, + } + return data + + @classmethod + def deserialize(cls, data: JsonDict) -> 'TypeInfo': + names = SymbolTable.deserialize(data['names']) + defn = ClassDef.deserialize(data['defn']) + ti = TypeInfo(names, defn) + ti._fullname = data['fullname'] + ti.alt_fullname = data['alt_fullname'] + # TODO: Is there a reason to reconstruct ti.subtypes? + ti.is_abstract = data['is_abstract'] + ti.abstract_attributes = data['abstract_attributes'] + ti.is_enum = data['is_enum'] + ti.fallback_to_any = data['fallback_to_any'] + ti.type_vars = data['type_vars'] + ti.bases = [mypy.types.Instance.deserialize(b) for b in data['bases']] + ti._promote = (None if data['_promote'] is None + else mypy.types.Type.deserialize(data['_promote'])) + ti.tuple_type = (None if data['tuple_type'] is None + else mypy.types.TupleType.deserialize(data['tuple_type'])) + ti.is_named_tuple = data['is_named_tuple'] + return ti + class SymbolTableNode: # Kind of node. Possible values: @@ -1671,12 +1946,15 @@ class SymbolTableNode: # Module id (e.g. "foo.bar") or None mod_id = '' # If this not None, override the type of the 'node' attribute. - type_override = None # type: mypy.types.Type + type_override = None # type: Optional[mypy.types.Type] # If False, this name won't be imported via 'from import *'. # This has no effect on names within classes. module_public = True + # For deserialized MODULE_REF nodes, the referenced module name; + # for other nodes, optionally the name of the referenced object. + cross_ref = None # type: Optional[str] - def __init__(self, kind: int, node: SymbolNode, mod_id: str = None, + def __init__(self, kind: int, node: Optional[SymbolNode], mod_id: str = None, typ: 'mypy.types.Type' = None, tvar_id: int = 0, module_public: bool = True) -> None: self.kind = kind @@ -1716,6 +1994,65 @@ def __str__(self) -> str: s += ' : {}'.format(self.type) return s + def serialize(self, prefix: str, name: str) -> JsonDict: + """Serialize a SymbolTableNode. + + Args: + prefix: full name of the containing module or class; or None + name: name of this object relative to the containing object + """ + data = {'.class': 'SymbolTableNode', + 'kind': node_kinds[self.kind], + } # type: JsonDict + if self.tvar_id: + data['tvar_id'] = self.tvar_id + if not self.module_public: + data['module_public'] = False + if self.kind == MODULE_REF: + assert self.node is not None, "Missing module cross ref in %s for %s" % (prefix, name) + data['cross_ref'] = self.node.fullname() + else: + if self.node is not None: + if prefix is not None: + # Check whether this is an alias for another object. + # If the object's canonical full name differs from + # the full name computed from prefix and name, + # it's an alias, and we serialize it as a cross ref. + if isinstance(self.node, TypeInfo): + fullname = self.node.alt_fullname or self.node.fullname() + else: + fullname = self.node.fullname() + if (fullname is not None and '.' in fullname and + fullname != prefix + '.' + name): + data['cross_ref'] = fullname + return data + data['node'] = self.node.serialize() + if self.type_override is not None: + data['type_override'] = self.type_override.serialize() + return data + + @classmethod + def deserialize(cls, data: JsonDict) -> 'SymbolTableNode': + assert data['.class'] == 'SymbolTableNode' + kind = inverse_node_kinds[data['kind']] + if 'cross_ref' in data: + # This will be fixed up later. + stnode = SymbolTableNode(kind, None) + stnode.cross_ref = data['cross_ref'] + else: + node = None + if 'node' in data: + node = SymbolNode.deserialize(data['node']) + typ = None + if 'type_override' in data: + typ = mypy.types.Type.deserialize(data['type_override']) + stnode = SymbolTableNode(kind, node, typ=typ) + if 'tvar_id' in data: + stnode.tvar_id = data['tvar_id'] + if 'module_public' in data: + stnode.module_public = data['module_public'] + return stnode + class SymbolTable(Dict[str, SymbolTableNode]): def __str__(self) -> str: @@ -1734,10 +2071,26 @@ def __str__(self) -> str: a[-1] += ')' return '\n'.join(a) - -def clean_up(s: str) -> str: - # TODO remove - return re.sub('.*::', '', s) + def serialize(self, fullname: str) -> JsonDict: + data = {'.class': 'SymbolTable'} # type: JsonDict + for key, value in self.items(): + # Skip __builtins__: it's a reference to the builtins + # module that gets added to every module by + # SemanticAnalyzer.visit_file(), but it shouldn't be + # accessed by users of the module. + if key == '__builtins__': + continue + data[key] = value.serialize(fullname, key) + return data + + @classmethod + def deserialize(cls, data: JsonDict) -> 'SymbolTable': + assert data['.class'] == 'SymbolTable' + st = SymbolTable() + for key, value in data.items(): + if key != '.class': + st[key] = SymbolTableNode.deserialize(value) + return st def function_type(func: FuncBase, fallback: 'mypy.types.Instance') -> 'mypy.types.FunctionLike': @@ -1798,13 +2151,20 @@ class MroError(Exception): """Raised if a consistent mro cannot be determined for a class.""" -def linearize_hierarchy(info: TypeInfo) -> List[TypeInfo]: +def linearize_hierarchy(info: TypeInfo) -> Optional[List[TypeInfo]]: # TODO describe if info.mro: return info.mro bases = info.direct_base_classes() - return [info] + merge([linearize_hierarchy(base) for base in bases] + - [bases]) + lin_bases = [] + for base in bases: + assert base is not None, "Cannot linearize bases for %s %s" % (info.fullname(), bases) + more_bases = linearize_hierarchy(base) + if more_bases is None: + return None + lin_bases.append(more_bases) + lin_bases.append(bases) + return [info] + merge(lin_bases) def merge(seqs: List[List[TypeInfo]]) -> List[TypeInfo]: diff --git a/mypy/semanal.py b/mypy/semanal.py index 58e47f14ed94..21a2248d4372 100644 --- a/mypy/semanal.py +++ b/mypy/semanal.py @@ -1349,20 +1349,23 @@ def process_namedtuple_definition(self, s: AssignmentStmt) -> None: """Check if s defines a namedtuple; if yes, store the definition in symbol table.""" if len(s.lvalues) != 1 or not isinstance(s.lvalues[0], NameExpr): return - named_tuple = self.check_namedtuple(s.rvalue) + lvalue = cast(NameExpr, s.lvalues[0]) + name = lvalue.name + named_tuple = self.check_namedtuple(s.rvalue, name) if named_tuple is None: return # Yes, it's a valid namedtuple definition. Add it to the symbol table. - lvalue = cast(NameExpr, s.lvalues[0]) - name = lvalue.name node = self.lookup(name, s) node.kind = GDEF # TODO locally defined namedtuple # TODO call.analyzed node.node = named_tuple - def check_namedtuple(self, node: Node) -> TypeInfo: + def check_namedtuple(self, node: Node, var_name: str = None) -> TypeInfo: """Check if a call defines a namedtuple. + The optional var_name argument is the name of the variable to + which this is assigned, if any. + If it does, return the corresponding TypeInfo. Return None otherwise. If the definition is invalid but looks like a namedtuple, @@ -1382,8 +1385,13 @@ def check_namedtuple(self, node: Node) -> TypeInfo: # Error. Construct dummy return value. return self.build_namedtuple_typeinfo('namedtuple', [], []) else: + # Give it a unique name derived from the line number. name = cast(StrExpr, call.args[0]).value + if name != var_name: + name += '@' + str(call.line) info = self.build_namedtuple_typeinfo(name, items, types) + # Store it as a global just in case it would remain anonymous. + self.globals[name] = SymbolTableNode(GDEF, info, self.cur_mod_id) call.analyzed = NamedTupleExpr(info).set_line(call.line) return info @@ -1836,8 +1844,7 @@ def visit_member_expr(self, expr: MemberExpr) -> None: if isinstance(base, RefExpr) and cast(RefExpr, base).kind == MODULE_REF: file = cast(MypyFile, cast(RefExpr, base).node) - names = file.names - n = names.get(expr.name, None) + n = file.names.get(expr.name, None) if file is not None else None if n: n = self.normalize_type_alias(n, expr) if not n: @@ -1853,7 +1860,7 @@ def visit_member_expr(self, expr: MemberExpr) -> None: # one type checker run. If we reported errors here, # the build would terminate after semantic analysis # and we wouldn't be able to report any type errors. - full_name = '%s.%s' % (file.fullname(), expr.name) + full_name = '%s.%s' % (file.fullname() if file is not None else None, expr.name) if full_name in obsolete_name_mapping: self.fail("Module has no attribute %r (it's now called %r)" % ( expr.name, obsolete_name_mapping[full_name]), expr) @@ -2319,7 +2326,10 @@ def process_nested_classes(self, outer_def: ClassDef) -> None: for node in outer_def.defs.body: if isinstance(node, ClassDef): node.info = TypeInfo(SymbolTable(), node) - node.info._fullname = node.info.name() + if outer_def.fullname: + node.info._fullname = outer_def.fullname + '.' + node.info.name() + else: + node.info._fullname = node.info.name() symbol = SymbolTableNode(MDEF, node.info) outer_def.info.names[node.name] = symbol self.process_nested_classes(node) diff --git a/mypy/test/data/check-incremental.test b/mypy/test/data/check-incremental.test new file mode 100644 index 000000000000..c27bcd71e4a8 --- /dev/null +++ b/mypy/test/data/check-incremental.test @@ -0,0 +1,31 @@ +-- Checks for incremental mode (see testcheck.py). +-- Each test is run twice, once with a cold cache, once with a warm cache. +-- The first time it must pass. +-- Before it is run the second time, any *.py.next files are copied to *.py. +-- The second time it must produce the errors given in the [out] section, if any. + +[case testIncrementalEmpty] +[out] + +[case testIncrementalBasics] +import m +[file m.py] +def foo(): + pass +[file m.py.next] +def foo() -> None: + pass +[out] + +[case testIncrementalError] +import m +[file m.py] +def foo() -> None: + pass +[file m.py.next] +def foo() -> None: + bar() +[out] +main:1: note: In module imported here: +tmp/m.py: note: In function "foo": +tmp/m.py:2: error: Name 'bar' is not defined diff --git a/mypy/test/data/semanal-classes.test b/mypy/test/data/semanal-classes.test index 71af09a7af8f..5fc310077a00 100644 --- a/mypy/test/data/semanal-classes.test +++ b/mypy/test/data/semanal-classes.test @@ -329,7 +329,7 @@ MypyFile:1( B PassStmt:2()) ExpressionStmt:3( - NameExpr(B [m])))) + NameExpr(B [__main__.A.B])))) [case testClassWithBaseClassWithinClass] class A: @@ -345,7 +345,7 @@ MypyFile:1( ClassDef:3( C BaseType( - B) + __main__.A.B) PassStmt:3()))) [case testDeclarationReferenceToNestedClass] diff --git a/mypy/test/data/semanal-errors.test b/mypy/test/data/semanal-errors.test index 740bd55db57f..18fd154781c9 100644 --- a/mypy/test/data/semanal-errors.test +++ b/mypy/test/data/semanal-errors.test @@ -295,7 +295,7 @@ from . import m [out] main:1: error: No parent module -- cannot perform relative import -[case testRelativeImportAtTopLevelModule] +[case testRelativeImportAtTopLevelModule2] from .. import m [out] main:1: error: No parent module -- cannot perform relative import diff --git a/mypy/test/data/semanal-namedtuple.test b/mypy/test/data/semanal-namedtuple.test index de4968e67558..9ff9f6af6e8a 100644 --- a/mypy/test/data/semanal-namedtuple.test +++ b/mypy/test/data/semanal-namedtuple.test @@ -86,9 +86,9 @@ MypyFile:1( ClassDef:2( A TupleType( - Tuple[Any, fallback=__main__.N]) + Tuple[Any, fallback=__main__.N@2]) BaseType( - __main__.N) + __main__.N@2) PassStmt:2())) [case testNamedTupleBaseClassWithItemTypes] @@ -100,9 +100,9 @@ MypyFile:1( ClassDef:2( A TupleType( - Tuple[builtins.int, fallback=__main__.N]) + Tuple[builtins.int, fallback=__main__.N@2]) BaseType( - __main__.N) + __main__.N@2) PassStmt:2())) -- Errors diff --git a/mypy/test/helpers.py b/mypy/test/helpers.py index 38a49e1e0a20..6877fb46d02b 100644 --- a/mypy/test/helpers.py +++ b/mypy/test/helpers.py @@ -7,6 +7,7 @@ from mypy import defaults from mypy.myunit import AssertionFailure from mypy.test import config +from mypy.test.data import DataDrivenTestCase # AssertStringArraysEqual displays special line alignment helper messages if @@ -85,7 +86,7 @@ def assert_string_arrays_equal(expected: List[str], actual: List[str], raise AssertionFailure(msg) -def update_testcase_output(testcase, output, append): +def update_testcase_output(testcase: DataDrivenTestCase, output: List[str], append: str) -> None: testcase_path = os.path.join(testcase.old_cwd, testcase.file) newfile = testcase_path + append data_lines = open(testcase_path).read().splitlines() @@ -182,7 +183,7 @@ def assert_string_arrays_equal_wildcards(expected: List[str], assert_string_arrays_equal(expected, actual, msg) -def clean_up(a): +def clean_up(a: List[str]) -> List[str]: """Remove common directory prefix from all strings in a. This uses a naive string replace; it seems to work well enough. Also @@ -274,3 +275,12 @@ def testcase_pyversion(path: str, testcase_name: str) -> Tuple[int, int]: return defaults.PYTHON2_VERSION else: return testfile_pyversion(path) + + +def normalize_error_messages(messages: List[str]) -> List[str]: + """Translate an array of error messages to use / as path separator.""" + + a = [] + for m in messages: + a.append(m.replace(os.sep, '/')) + return a diff --git a/mypy/test/testcheck.py b/mypy/test/testcheck.py index 664822fd66aa..8f8c1c60b5cf 100644 --- a/mypy/test/testcheck.py +++ b/mypy/test/testcheck.py @@ -2,20 +2,21 @@ import os.path import re +import shutil import sys -from typing import Tuple, List +from typing import Tuple, List, Dict, Set from mypy import build import mypy.myunit # for mutable globals (ick!) from mypy.build import BuildSource -from mypy.myunit import Suite +from mypy.myunit import Suite, AssertionFailure from mypy.test.config import test_temp_dir, test_data_prefix -from mypy.test.data import parse_test_cases +from mypy.test.data import parse_test_cases, DataDrivenTestCase from mypy.test.helpers import ( - assert_string_arrays_equal, testcase_pyversion, update_testcase_output + assert_string_arrays_equal, normalize_error_messages, + testcase_pyversion, update_testcase_output, ) -from mypy.test.testsemanal import normalize_error_messages from mypy.errors import CompileError @@ -54,41 +55,127 @@ 'check-type-promotion.test', 'check-semanal-error.test', 'check-flags.test', + 'check-incremental.test', ] class TypeCheckSuite(Suite): - def cases(self): - c = [] + + def cases(self) -> List[DataDrivenTestCase]: + c = [] # type: List[DataDrivenTestCase] for f in files: c += parse_test_cases(os.path.join(test_data_prefix, f), self.run_test, test_temp_dir, True) return c - def run_test(self, testcase): - a = [] + def run_test(self, testcase: DataDrivenTestCase) -> None: + incremental = 'Incremental' in testcase.name.lower() or 'incremental' in testcase.file + if incremental: + # Incremental tests are run once with a cold cache, once with a warm cache. + # Expect success on first run, errors from testcase.output (if any) on second run. + self.clear_cache() + self.run_test_once(testcase, 1) + self.run_test_once(testcase, 2) + else: + self.run_test_once(testcase) + + def clear_cache(self) -> None: + dn = build.MYPY_CACHE + if os.path.exists(dn): + shutil.rmtree(dn) + + def run_test_once(self, testcase: DataDrivenTestCase, incremental=0) -> None: pyversion = testcase_pyversion(testcase.file, testcase.name) program_text = '\n'.join(testcase.input) module_name, program_name, program_text = self.parse_options(program_text) flags = self.parse_flags(program_text) + output = testcase.output + if incremental: + flags.append(build.INCREMENTAL) + if incremental == 1: + # In run 1, copy program text to program file. + output = [] + with open(program_name, 'w') as f: + f.write(program_text) + program_text = None + elif incremental == 2: + # In run 2, copy *.py.next files to *.py files. + for dn, dirs, files in os.walk(os.curdir): + for file in files: + if file.endswith('.py.next'): + full = os.path.join(dn, file) + target = full[:-5] + shutil.copy(full, target) source = BuildSource(program_name, module_name, program_text) try: - build.build(target=build.TYPE_CHECK, - sources=[source], - pyversion=pyversion, - flags=flags + [build.TEST_BUILTINS], - alt_lib_path=test_temp_dir) + res = build.build(target=build.TYPE_CHECK, + sources=[source], + pyversion=pyversion, + flags=flags + [build.TEST_BUILTINS], + alt_lib_path=test_temp_dir) + a = res.errors except CompileError as e: - a = normalize_error_messages(e.messages) + res = None + a = e.messages + a = normalize_error_messages(a) - if testcase.output != a and mypy.myunit.UPDATE_TESTCASES: + if output != a and mypy.myunit.UPDATE_TESTCASES: update_testcase_output(testcase, a, mypy.myunit.APPEND_TESTCASES) assert_string_arrays_equal( - testcase.output, a, + output, a, 'Invalid type checker output ({}, line {})'.format( testcase.file, testcase.line)) + if incremental and res: + self.verify_cache(module_name, program_name, a, res.manager) + + def verify_cache(self, module_name: str, program_name: str, a: List[str], + manager: build.BuildManager) -> None: + # There should be valid cache metadata for each module except + # those in error_paths; for those there should not be. + # + # NOTE: When A imports B and there's an error in B, the cache + # data for B is invalidated, but the cache data for A remains. + # However build.process_graphs() will ignore A's cache data. + error_paths = self.find_error_paths(a) + modules = self.find_module_files() + modules.update({module_name: program_name}) + missing_paths = self.find_missing_cache_files(modules, manager) + if missing_paths != error_paths: + raise AssertionFailure("cache data discrepancy %s != %s" % + (missing_paths, error_paths)) + + def find_error_paths(self, a: List[str]) -> Set[str]: + hits = set() + for line in a: + m = re.match(r'([^\s:]+):\d+: error:', line) + if m: + hits.add(m.group(1)) + return hits + + def find_module_files(self) -> Dict[str, str]: + modules = {} + for dn, dirs, files in os.walk(test_temp_dir): + dnparts = dn.split(os.sep) + assert dnparts[0] == test_temp_dir + del dnparts[0] + for file in files: + if file.endswith('.py'): + base, ext = os.path.splitext(file) + id = '.'.join(dnparts + [base]) + modules[id] = os.path.join(dn, file) + return modules + + def find_missing_cache_files(self, modules: Dict[str, str], + manager: build.BuildManager) -> Set[str]: + missing = {} + for id, path in modules.items(): + meta = build.find_cache_meta(id, path, manager) + if meta is None: + missing[id] = path + return set(missing.values()) + def parse_options(self, program_text: str) -> Tuple[str, str, str]: """Return type check options for a test case. diff --git a/mypy/test/testgraph.py b/mypy/test/testgraph.py new file mode 100644 index 000000000000..6f9897660acf --- /dev/null +++ b/mypy/test/testgraph.py @@ -0,0 +1,49 @@ +"""Test cases for graph processing code in build.py.""" + +from typing import AbstractSet, Dict, Set + +from mypy.myunit import Suite, assert_equal +from mypy.build import BuildManager, State, TYPE_CHECK +from mypy.build import topsort, strongly_connected_components, sorted_components + + +class GraphSuite(Suite): + + def test_topsort(self) -> None: + a = frozenset({'A'}) + b = frozenset({'B'}) + c = frozenset({'C'}) + d = frozenset({'D'}) + data = {a: {b, c}, b: {d}, c: {d}} # type: Dict[AbstractSet[str], Set[AbstractSet[str]]] + res = list(topsort(data)) + assert_equal(res, [{d}, {b, c}, {a}]) + + def test_scc(self) -> None: + vertices = {'A', 'B', 'C', 'D'} + edges = {'A': ['B', 'C'], + 'B': ['C'], + 'C': ['B', 'D'], + 'D': []} # type: Dict[str, List[str]] + sccs = set(frozenset(x) for x in strongly_connected_components(vertices, edges)) + assert_equal(sccs, + {frozenset({'A'}), + frozenset({'B', 'C'}), + frozenset({'D'})}) + + def test_sorted_components(self) -> None: + manager = BuildManager( + data_dir='', + lib_path=[], + target=TYPE_CHECK, + pyversion=(3, 5), + flags=[], + ignore_prefix='', + custom_typing_module='', + source_set=None, + reports=None) + graph = {'a': State('a', None, 'import b, c', manager), + 'b': State('b', None, 'import c', manager), + 'c': State('c', None, 'import b, d', manager), + 'd': State('d', None, 'pass', manager)} + res = sorted_components(graph) + assert_equal(res, [frozenset({'d'}), frozenset({'c', 'b'}), frozenset({'a'})]) diff --git a/mypy/test/testsemanal.py b/mypy/test/testsemanal.py index 68da884655b0..a1d6a5835ac6 100644 --- a/mypy/test/testsemanal.py +++ b/mypy/test/testsemanal.py @@ -7,7 +7,9 @@ from mypy import build from mypy.build import BuildSource from mypy.myunit import Suite -from mypy.test.helpers import assert_string_arrays_equal, testfile_pyversion +from mypy.test.helpers import ( + assert_string_arrays_equal, normalize_error_messages, testfile_pyversion, +) from mypy.test.data import parse_test_cases from mypy.test.config import test_data_prefix, test_temp_dir from mypy.errors import CompileError @@ -52,7 +54,9 @@ def test_semanal(testcase): pyversion=testfile_pyversion(testcase.file), flags=[build.TEST_BUILTINS], alt_lib_path=test_temp_dir) - a = [] + a = result.errors + if a: + raise CompileError(a) # Include string representations of the source files in the actual # output. for fnam in sorted(result.files.keys()): @@ -96,28 +100,19 @@ def test_semanal_error(testcase): try: src = '\n'.join(testcase.input) - build.build(target=build.SEMANTIC_ANALYSIS, - sources=[BuildSource('main', None, src)], - flags=[build.TEST_BUILTINS], - alt_lib_path=test_temp_dir) - raise AssertionError('No errors reported in {}, line {}'.format( - testcase.file, testcase.line)) + res = build.build(target=build.SEMANTIC_ANALYSIS, + sources=[BuildSource('main', None, src)], + flags=[build.TEST_BUILTINS], + alt_lib_path=test_temp_dir) + a = res.errors + assert a, 'No errors reported in {}, line {}'.format(testcase.file, testcase.line) except CompileError as e: # Verify that there was a compile error and that the error messages # are equivalent. - assert_string_arrays_equal( - testcase.output, normalize_error_messages(e.messages), - 'Invalid compiler output ({}, line {})'.format(testcase.file, - testcase.line)) - - -def normalize_error_messages(messages): - """Translate an array of error messages to use / as path separator.""" - - a = [] - for m in messages: - a.append(m.replace(os.sep, '/')) - return a + a = e.messages + assert_string_arrays_equal( + testcase.output, normalize_error_messages(a), + 'Invalid compiler output ({}, line {})'.format(testcase.file, testcase.line)) # SymbolNode table export test cases @@ -144,7 +139,9 @@ def run_test(self, testcase): flags=[build.TEST_BUILTINS], alt_lib_path=test_temp_dir) # The output is the symbol table converted into a string. - a = [] + a = result.errors + if a: + raise CompileError(a) for f in sorted(result.files.keys()): if f not in ('builtins', 'typing', 'abc'): a.append('{}:'.format(f)) @@ -181,6 +178,9 @@ def run_test(self, testcase): sources=[BuildSource('main', None, src)], flags=[build.TEST_BUILTINS], alt_lib_path=test_temp_dir) + a = result.errors + if a: + raise CompileError(a) # Collect all TypeInfos in top-level modules. typeinfos = TypeInfoMap() diff --git a/mypy/test/testtransform.py b/mypy/test/testtransform.py index 51789b943395..1d9916ea2f5e 100644 --- a/mypy/test/testtransform.py +++ b/mypy/test/testtransform.py @@ -44,7 +44,9 @@ def test_transform(testcase): pyversion=testfile_pyversion(testcase.file), flags=[build.TEST_BUILTINS], alt_lib_path=test_temp_dir) - a = [] + a = result.errors + if a: + raise CompileError(a) # Include string representations of the source files in the actual # output. for fnam in sorted(result.files.keys()): diff --git a/mypy/test/testtypegen.py b/mypy/test/testtypegen.py index 29a89cedc307..0e4432bcb71b 100644 --- a/mypy/test/testtypegen.py +++ b/mypy/test/testtypegen.py @@ -29,7 +29,6 @@ def cases(self): return c def run_test(self, testcase): - a = [] try: line = testcase.input[0] mask = '' @@ -41,6 +40,7 @@ def run_test(self, testcase): sources=[BuildSource('main', None, src)], flags=[build.TEST_BUILTINS], alt_lib_path=config.test_temp_dir) + a = result.errors map = result.types nodes = map.keys() diff --git a/mypy/types.py b/mypy/types.py index 792a360209b4..ad0b23c20471 100644 --- a/mypy/types.py +++ b/mypy/types.py @@ -1,7 +1,7 @@ """Classes for representing mypy types.""" from abc import abstractmethod -from typing import Any, TypeVar, List, Tuple, cast, Generic, Set, Sequence, Optional +from typing import Any, TypeVar, Dict, List, Tuple, cast, Generic, Set, Sequence, Optional import mypy.nodes from mypy.nodes import INVARIANT, SymbolNode @@ -9,6 +9,8 @@ T = TypeVar('T') +JsonDict = Dict[str, Any] + class Type(mypy.nodes.Context): """Abstract base class for all types.""" @@ -27,18 +29,31 @@ def accept(self, visitor: 'TypeVisitor[T]') -> T: def __repr__(self) -> str: return self.accept(TypeStrVisitor()) + def serialize(self) -> JsonDict: + raise NotImplementedError('Cannot serialize {} instance'.format(self.__class__.__name__)) + + @classmethod + def deserialize(cls, data: JsonDict) -> 'Type': + classname = data['.class'] + glo = globals() + if classname in glo: + cl = glo[classname] + if 'deserialize' in cl.__dict__: + return cl.deserialize(data) + raise NotImplementedError('unexpected .class {}'.format(classname)) + class TypeVarDef(mypy.nodes.Context): """Definition of a single type variable.""" name = '' id = 0 - values = None # type: List[Type] + values = None # type: Optional[List[Type]] upper_bound = None # type: Type variance = INVARIANT # type: int line = 0 - def __init__(self, name: str, id: int, values: List[Type], + def __init__(self, name: str, id: int, values: Optional[List[Type]], upper_bound: Type, variance: int = INVARIANT, line: int = -1) -> None: self.name = name self.id = id @@ -56,6 +71,26 @@ def __repr__(self) -> str: else: return self.name + def serialize(self) -> JsonDict: + return {'.class': 'TypeVarDef', + 'name': self.name, + 'id': self.id, + 'values': None if self.values is None else [v.serialize() for v in self.values], + 'upper_bound': self.upper_bound.serialize(), + 'variance': self.variance, + } + + @classmethod + def deserialize(cls, data: JsonDict) -> 'TypeVarDef': + assert data['.class'] == 'TypeVarDef' + return TypeVarDef(data['name'], + data['id'], + None if data['values'] is None + else [Type.deserialize(v) for v in data['values']], + Type.deserialize(data['upper_bound']), + data['variance'], + ) + class UnboundType(Type): """Instance type that has not been bound during semantic analysis.""" @@ -73,6 +108,18 @@ def __init__(self, name: str, args: List[Type] = None, line: int = -1) -> None: def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_unbound_type(self) + def serialize(self) -> JsonDict: + return {'.class': 'UnboundType', + 'name': self.name, + 'args': [a.serialize() for a in self.args], + } + + @classmethod + def deserialize(self, data: JsonDict) -> 'UnboundType': + assert data['.class'] == 'UnboundType' + return UnboundType(data['name'], + [Type.deserialize(a) for a in data['args']]) + class ErrorType(Type): """The error type is used as the result of failed type operations.""" @@ -98,6 +145,16 @@ def __init__(self, items: List[Type], line: int = -1) -> None: def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_type_list(self) + def serialize(self) -> JsonDict: + return {'.class': 'TypeList', + 'items': [t.serialize() for t in self.items], + } + + @classmethod + def deserialize(self, data: JsonDict) -> 'TypeList': + assert data['.class'] == 'TypeList' + return TypeList([Type.deserialize(t) for t in data['items']]) + class AnyType(Type): """The type 'Any'.""" @@ -109,6 +166,14 @@ def __init__(self, implicit=False, line: int = -1) -> None: def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_any(self) + def serialize(self) -> JsonDict: + return {'.class': 'AnyType'} + + @classmethod + def deserialize(cls, data: JsonDict) -> 'AnyType': + assert data['.class'] == 'AnyType' + return AnyType() + class Void(Type): """The return type 'None'. @@ -129,6 +194,14 @@ def accept(self, visitor: 'TypeVisitor[T]') -> T: def with_source(self, source: str) -> 'Void': return Void(source, self.line) + def serialize(self) -> JsonDict: + return {'.class': 'Void'} + + @classmethod + def deserialize(cls, data: JsonDict) -> 'Void': + assert data['.class'] == 'Void' + return Void() + class NoneTyp(Type): """The type of 'None'. @@ -149,6 +222,14 @@ def __init__(self, line: int = -1) -> None: def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_none_type(self) + def serialize(self) -> JsonDict: + return {'.class': 'NoneTyp'} + + @classmethod + def deserialize(self, data: JsonDict) -> 'NoneTyp': + assert data['.class'] == 'NoneTyp' + return NoneTyp() + class ErasedType(Type): """Placeholder for an erased type. @@ -176,6 +257,15 @@ def __init__(self, source: str = None, line: int = -1) -> None: def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_deleted_type(self) + def serialize(self) -> JsonDict: + return {'.class': 'DeletedType', + 'source': self.source} + + @classmethod + def deserialize(self, data: JsonDict) -> 'DeletedType': + assert data['.class'] == 'DeletedType' + return DeletedType(data['source']) + class Instance(Type): """An instance type of form C[T1, ..., Tn]. @@ -197,6 +287,29 @@ def __init__(self, typ: mypy.nodes.TypeInfo, args: List[Type], def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_instance(self) + type_ref = None # type: str + + def serialize(self) -> JsonDict: + data = {'.class': 'Instance', + } # type: JsonDict + assert self.type is not None + data['type_ref'] = self.type.alt_fullname or self.type.fullname() + if self.args: + data['args'] = [arg.serialize() for arg in self.args] + return data + + @classmethod + def deserialize(cls, data: JsonDict) -> 'Instance': + assert data['.class'] == 'Instance' + args = [] # type: List[Type] + if 'args' in data: + args_list = data['args'] + assert isinstance(args_list, list) + args = [Type.deserialize(arg) for arg in args_list] + inst = Instance(None, args) + inst.type_ref = data['type_ref'] # Will be fixed up by fixup.py later. + return inst + class TypeVarType(Type): """A type variable type. @@ -224,6 +337,24 @@ def __init__(self, name: str, id: int, values: List[Type], upper_bound: Type, def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_type_var(self) + def serialize(self) -> JsonDict: + return {'.class': 'TypeVarType', + 'name': self.name, + 'id': self.id, + 'values': [v.serialize() for v in self.values], + 'upper_bound': self.upper_bound.serialize(), + 'variance': self.variance, + } + + @classmethod + def deserialize(cls, data: JsonDict) -> 'TypeVarType': + assert data['.class'] == 'TypeVarType' + return TypeVarType(data['name'], + data['id'], + [Type.deserialize(v) for v in data['values']], + Type.deserialize(data['upper_bound']), + data['variance']) + class FunctionLike(Type): """Abstract base class for function types.""" @@ -246,6 +377,10 @@ def with_name(self, name: str) -> 'FunctionLike': pass # Corresponding instance type (e.g. builtins.type) fallback = None # type: Instance + @classmethod + def deserialize(cls, data: JsonDict) -> 'FunctionLike': + return cast(FunctionLike, super().deserialize(data)) + _dummy = object() # type: Any @@ -256,9 +391,9 @@ class CallableType(FunctionLike): arg_types = None # type: List[Type] # Types of function arguments arg_kinds = None # type: List[int] # mypy.nodes.ARG_ constants arg_names = None # type: List[str] # None if not a keyword argument - min_args = 0 # Minimum number of arguments - is_var_arg = False # Is it a varargs function? - ret_type = None # type:Type # Return value type + min_args = 0 # Minimum number of arguments; derived from arg_kinds + is_var_arg = False # Is it a varargs function? derived from arg_kinds + ret_type = None # type: Type # Return value type name = '' # Name (may be None; for error messages) definition = None # type: SymbolNode # For error messages. May be None. # Type variables for a generic function @@ -286,7 +421,8 @@ class CallableType(FunctionLike): # Was this type implicitly generated instead of explicitly specified by the user? implicit = False - def __init__(self, arg_types: List[Type], + def __init__(self, + arg_types: List[Type], arg_kinds: List[int], arg_names: List[str], ret_type: Type, @@ -350,7 +486,7 @@ def copy_modified(self, ) def is_type_obj(self) -> bool: - return self.fallback.type.fullname() == 'builtins.type' + return self.fallback.type is not None and self.fallback.type.fullname() == 'builtins.type' def is_concrete_type_obj(self) -> bool: return self.is_type_obj() and self.is_classmethod_class @@ -390,6 +526,43 @@ def type_var_ids(self) -> List[int]: a.append(tv.id) return a + def serialize(self) -> JsonDict: + # TODO: As an optimization, leave out everything related to + # generic functions for non-generic functions. + return {'.class': 'CallableType', + 'arg_types': [(None if t is None else t.serialize()) + for t in self.arg_types], + 'arg_kinds': self.arg_kinds, + 'arg_names': self.arg_names, + 'ret_type': self.ret_type.serialize(), + 'fallback': self.fallback.serialize(), + 'name': self.name, + # We don't serialize the definition (only used for error messages). + 'variables': [v.serialize() for v in self.variables], + 'bound_vars': [[x, y.serialize()] for x, y in self.bound_vars], + 'is_ellipsis_args': self.is_ellipsis_args, + 'implicit': self.implicit, + 'is_classmethod_class': self.is_classmethod_class, + } + + @classmethod + def deserialize(cls, data: JsonDict) -> 'CallableType': + assert data['.class'] == 'CallableType' + # TODO: Set definition to the containing SymbolNode? + return CallableType([(None if t is None else Type.deserialize(t)) + for t in data['arg_types']], + data['arg_kinds'], + data['arg_names'], + Type.deserialize(data['ret_type']), + Instance.deserialize(data['fallback']), + name=data['name'], + variables=[TypeVarDef.deserialize(v) for v in data['variables']], + bound_vars=[(x, Type.deserialize(y)) for x, y in data['bound_vars']], + is_ellipsis_args=data['is_ellipsis_args'], + implicit=data['implicit'], + is_classmethod_class=data['is_classmethod_class'], + ) + class Overloaded(FunctionLike): """Overloaded function type T1, ... Tn, where each Ti is CallableType. @@ -432,6 +605,16 @@ def with_name(self, name: str) -> 'Overloaded': def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_overloaded(self) + def serialize(self) -> JsonDict: + return {'.class': 'Overloaded', + 'items': [t.serialize() for t in self.items()], + } + + @classmethod + def deserialize(self, data: JsonDict) -> 'Overloaded': + assert data['.class'] == 'Overloaded' + return Overloaded([CallableType.deserialize(t) for t in data['items']]) + class TupleType(Type): """The tuple type Tuple[T1, ..., Tn] (at least one type argument). @@ -461,6 +644,20 @@ def length(self) -> int: def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_tuple_type(self) + def serialize(self) -> JsonDict: + return {'.class': 'TupleType', + 'items': [t.serialize() for t in self.items], + 'fallback': self.fallback.serialize(), + 'implicit': self.implicit, + } + + @classmethod + def deserialize(cls, data: JsonDict) -> 'TupleType': + assert data['.class'] == 'TupleType' + return TupleType([Type.deserialize(t) for t in data['items']], + Instance.deserialize(data['fallback']), + implicit=data['implicit']) + class StarType(Type): """The star type *type_parameter. @@ -536,6 +733,16 @@ def has_readable_member(self, name: str) -> bool: (isinstance(x, Instance) and cast(Instance, x).type.has_readable_member(name)) for x in self.items) + def serialize(self) -> JsonDict: + return {'.class': 'UnionType', + 'items': [t.serialize() for t in self.items], + } + + @classmethod + def deserialize(cls, data: JsonDict) -> 'UnionType': + assert data['.class'] == 'UnionType' + return UnionType([Type.deserialize(t) for t in data['items']]) + class PartialType(Type): """Type such as List[?] where type arguments are unknown, or partial None type. @@ -575,6 +782,14 @@ class EllipsisType(Type): def accept(self, visitor: 'TypeVisitor[T]') -> T: return visitor.visit_ellipsis_type(self) + def serialize(self) -> JsonDict: + return {'.class': 'EllipsisType'} + + @classmethod + def deserialize(self, data: JsonDict) -> 'EllipsisType': + assert data['.class'] == 'EllipsisType' + return EllipsisType() + # # Visitor-related classes @@ -783,7 +998,7 @@ def visit_deleted_type(self, t): return "".format(t.source) def visit_instance(self, t): - s = t.type.fullname() + s = t.type.fullname() if t.type is not None else '' if t.erased: s += '*' if t.args != []: @@ -842,7 +1057,7 @@ def visit_overloaded(self, t): def visit_tuple_type(self, t): s = self.list_str(t.items) - if t.fallback: + if t.fallback and t.fallback.type: fallback_name = t.fallback.type.fullname() if fallback_name != 'builtins.tuple': return 'Tuple[{}, fallback={}]'.format(s, t.fallback.accept(self))