diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 09bfda1755e03..8ba0b8a276165 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ ci: skip: [pyright, mypy] repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.9 + rev: v0.11.4 hooks: - id: ruff args: [--exit-non-zero-on-fix] @@ -95,14 +95,14 @@ repos: - id: sphinx-lint args: ["--enable", "all", "--disable", "line-too-long"] - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v19.1.7 + rev: v20.1.0 hooks: - id: clang-format files: ^pandas/_libs/src|^pandas/_libs/include args: [-i] types_or: [c, c++] - repo: https://github.com/trim21/pre-commit-mirror-meson - rev: v1.7.0 + rev: v1.7.2 hooks: - id: meson-fmt args: ['--inplace'] diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index 6a2ab24df26fe..cd7851acae3f2 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -517,7 +517,7 @@ def setup(self): self.df = DataFrame(np.random.randn(1000, 100)) self.s = Series(np.arange(1028.0)) - self.df2 = DataFrame({i: self.s for i in range(1028)}) + self.df2 = DataFrame(dict.fromkeys(range(1028), self.s)) self.df3 = DataFrame(np.random.randn(1000, 3), columns=list("ABC")) def time_apply_user_func(self): diff --git a/pandas/_config/config.py b/pandas/_config/config.py index ce53e05608ba7..3b22dbf43c25c 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -775,7 +775,7 @@ def inner(key: str, *args, **kwds): pkey = f"{prefix}.{key}" return func(pkey, *args, **kwds) - return cast(F, inner) + return cast("F", inner) _register_option = register_option _get_option = get_option diff --git a/pandas/_config/localization.py b/pandas/_config/localization.py index 6602633f20399..d3ac1150ee19b 100644 --- a/pandas/_config/localization.py +++ b/pandas/_config/localization.py @@ -156,7 +156,9 @@ def get_locales( out_locales = [] for x in split_raw_locales: try: - out_locales.append(str(x, encoding=cast(str, options.display.encoding))) + out_locales.append( + str(x, encoding=cast("str", options.display.encoding)) + ) except UnicodeError: # 'locale -a' is used to populated 'raw_locales' and on # Redhat 7 Linux (and maybe others) prints locale names diff --git a/pandas/_testing/_warnings.py b/pandas/_testing/_warnings.py index a752c8db90f38..fb66cd8db02ad 100644 --- a/pandas/_testing/_warnings.py +++ b/pandas/_testing/_warnings.py @@ -121,7 +121,7 @@ class for all warnings. To raise multiple types of exceptions, ) else: expected_warning = cast( - Union[type[Warning], tuple[type[Warning], ...]], + "Union[type[Warning], tuple[type[Warning], ...]]", expected_warning, ) match = ( @@ -241,7 +241,7 @@ def _is_unexpected_warning( """Check if the actual warning issued is unexpected.""" if actual_warning and not expected_warning: return True - expected_warning = cast(type[Warning], expected_warning) + expected_warning = cast("type[Warning]", expected_warning) return bool(not issubclass(actual_warning.category, expected_warning)) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index daa5187cdb636..49b35545fa180 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -283,7 +283,7 @@ def _check_types(left, right, obj: str = "Index") -> None: # MultiIndex special comparison for little-friendly error messages if isinstance(left, MultiIndex): - right = cast(MultiIndex, right) + right = cast("MultiIndex", right) for level in range(left.nlevels): lobj = f"{obj} level [{level}]" @@ -776,11 +776,11 @@ def assert_extension_array_equal( # GH 52449 if not check_dtype and left.dtype.kind in "mM": if not isinstance(left.dtype, np.dtype): - l_unit = cast(DatetimeTZDtype, left.dtype).unit + l_unit = cast("DatetimeTZDtype", left.dtype).unit else: l_unit = np.datetime_data(left.dtype)[0] if not isinstance(right.dtype, np.dtype): - r_unit = cast(DatetimeTZDtype, right.dtype).unit + r_unit = cast("DatetimeTZDtype", right.dtype).unit else: r_unit = np.datetime_data(right.dtype)[0] if ( diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index abf86fc415641..3e99a4c3e13f0 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -169,7 +169,7 @@ def validate_argsort_with_ascending(ascending: bool | int | None, args, kwargs) ascending = True validate_argsort_kind(args, kwargs, max_fname_arg_count=3) - ascending = cast(bool, ascending) + ascending = cast("bool", ascending) return ascending diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 76f2fdad591ff..c9156c988bf15 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -172,12 +172,12 @@ def _ensure_data(values: ArrayLike) -> np.ndarray: return np.asarray(values) elif is_complex_dtype(values.dtype): - return cast(np.ndarray, values) + return cast("np.ndarray", values) # datetimelike elif needs_i8_conversion(values.dtype): npvalues = values.view("i8") - npvalues = cast(np.ndarray, npvalues) + npvalues = cast("np.ndarray", npvalues) return npvalues # we have failed, return object @@ -1289,9 +1289,9 @@ def searchsorted( if is_integer(value): # We know that value is int - value = cast(int, dtype.type(value)) + value = cast("int", dtype.type(value)) else: - value = pd_array(cast(ArrayLike, value), dtype=dtype) + value = pd_array(cast("ArrayLike", value), dtype=dtype) else: # E.g. if `arr` is an array with dtype='datetime64[ns]' # and `value` is a pd.Timestamp, we may need to convert value diff --git a/pandas/core/apply.py b/pandas/core/apply.py index da6124307e3f1..62832f25f4d03 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -322,19 +322,19 @@ def transform(self) -> DataFrame | Series: return obj.T.transform(func, 0, *args, **kwargs).T if is_list_like(func) and not is_dict_like(func): - func = cast(list[AggFuncTypeBase], func) + func = cast("list[AggFuncTypeBase]", func) # Convert func equivalent dict if is_series: func = {com.get_callable_name(v) or v: v for v in func} else: - func = {col: func for col in obj} + func = dict.fromkeys(obj, func) if is_dict_like(func): - func = cast(AggFuncTypeDict, func) + func = cast("AggFuncTypeDict", func) return self.transform_dict_like(func) # func is either str or callable - func = cast(AggFuncTypeBase, func) + func = cast("AggFuncTypeBase", func) try: result = self.transform_str_or_callable(func) except TypeError: @@ -434,7 +434,7 @@ def compute_list_like( Data for result. When aggregating with a Series, this can contain any Python objects. """ - func = cast(list[AggFuncTypeBase], self.func) + func = cast("list[AggFuncTypeBase]", self.func) obj = self.obj results = [] @@ -541,7 +541,7 @@ def compute_dict_like( obj = self.obj is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy)) - func = cast(AggFuncTypeDict, self.func) + func = cast("AggFuncTypeDict", self.func) func = self.normalize_dictlike_arg(op_name, selected_obj, func) is_non_unique_col = ( @@ -666,7 +666,7 @@ def apply_str(self) -> DataFrame | Series: result: Series or DataFrame """ # Caller is responsible for checking isinstance(self.f, str) - func = cast(str, self.func) + func = cast("str", self.func) obj = self.obj @@ -1262,7 +1262,7 @@ def numba_func(values, col_names, df_index, *args): return numba_func def apply_with_numba(self) -> dict[int, Any]: - func = cast(Callable, self.func) + func = cast("Callable", self.func) args, kwargs = prepare_function_arguments( func, self.args, self.kwargs, num_required_args=1 ) @@ -1404,7 +1404,7 @@ def numba_func(values, col_names_index, index, *args): return numba_func def apply_with_numba(self) -> dict[int, Any]: - func = cast(Callable, self.func) + func = cast("Callable", self.func) args, kwargs = prepare_function_arguments( func, self.args, self.kwargs, num_required_args=1 ) @@ -1551,7 +1551,7 @@ def apply_compat(self): def apply_standard(self) -> DataFrame | Series: # caller is responsible for ensuring that f is Callable - func = cast(Callable, self.func) + func = cast("Callable", self.func) obj = self.obj if isinstance(func, np.ufunc): diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 4e6f20e6ad3dd..6616758d4425f 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -85,7 +85,7 @@ def method(self, *args, **kwargs): order = "F" if flags.f_contiguous else "C" return result.reshape(self.shape, order=order) - return cast(F, method) + return cast("F", method) # error: Definition of "delete/ravel/T/repeat/copy" in base class "NDArrayBacked" diff --git a/pandas/core/arrays/arrow/accessors.py b/pandas/core/arrays/arrow/accessors.py index b220a94d032b5..674776fcd21d4 100644 --- a/pandas/core/arrays/arrow/accessors.py +++ b/pandas/core/arrays/arrow/accessors.py @@ -445,7 +445,7 @@ def get_name( while level_name_or_index: # we need the cast, otherwise mypy complains about # getting ints, bytes, or str here, which isn't possible. - level_name_or_index = cast(list, level_name_or_index) + level_name_or_index = cast("list", level_name_or_index) name_or_index = level_name_or_index.pop() name = get_name(name_or_index, selected) selected = selected.type.field(selected.type.get_field_index(name)) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 9295cf7873d98..192ae78b99ce4 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1231,7 +1231,7 @@ def factorize( uniques = type(self)(combined.dictionary) if pa_version_under11p0 and pa.types.is_duration(pa_type): - uniques = cast(ArrowExtensionArray, uniques.astype(self.dtype)) + uniques = cast("ArrowExtensionArray", uniques.astype(self.dtype)) return indices, uniques def reshape(self, *args, **kwargs): @@ -1991,7 +1991,7 @@ def __setitem__(self, key, value) -> None: elif is_integer(key): # fast path - key = cast(int, key) + key = cast("int", key) n = len(self) if key < 0: key += n diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 42be07e03bad8..a19f492bd7091 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1509,7 +1509,7 @@ def equals(self, other: object) -> bool: """ if type(self) != type(other): return False - other = cast(ExtensionArray, other) + other = cast("ExtensionArray", other) if self.dtype != other.dtype: return False elif len(self) != len(other): diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 647530151d5f6..46c13794a74d6 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2478,7 +2478,7 @@ def _mode(self, dropna: bool = True) -> Categorical: mask = self.isna() res_codes, _ = algorithms.mode(codes, mask=mask) - res_codes = cast(np.ndarray, res_codes) + res_codes = cast("np.ndarray", res_codes) assert res_codes.dtype == codes.dtype res = self._from_backing_data(res_codes) return res diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index b27bf19f2f593..c4844b6a905dd 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -191,7 +191,7 @@ def new_meth(self, *args, **kwargs): res_i8 = result.view("i8") return self._from_backing_data(res_i8) - return cast(F, new_meth) + return cast("F", new_meth) # error: Definition of "_concat_same_type" in base class "NDArrayBacked" is @@ -391,7 +391,7 @@ def __getitem__(self, key: PositionalIndexer2D) -> Self | DTScalarOrNaT: return result else: # At this point we know the result is an array. - result = cast(Self, result) + result = cast("Self", result) result._freq = self._get_getitem_freq(key) return result @@ -990,7 +990,7 @@ def _cmp_method(self, other, op): return result if not isinstance(self.dtype, PeriodDtype): - self = cast(TimelikeOps, self) + self = cast("TimelikeOps", self) if self._creso != other._creso: if not isinstance(other, type(self)): # i.e. Timedelta/Timestamp, cast to ndarray and let @@ -1637,7 +1637,7 @@ def _mode(self, dropna: bool = True): i8modes, _ = algorithms.mode(self.view("i8"), mask=mask) npmodes = i8modes.view(self._ndarray.dtype) - npmodes = cast(np.ndarray, npmodes) + npmodes = cast("np.ndarray", npmodes) return self._from_backing_data(npmodes) # ------------------------------------------------------------------ @@ -2198,7 +2198,7 @@ def _round(self, freq, mode, ambiguous, nonexistent): ) values = self.view("i8") - values = cast(np.ndarray, values) + values = cast("np.ndarray", values) nanos = get_unit_for_round(freq, self._creso) if nanos == 0: # GH 52761 diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index df40c9c11b117..3b9ad0aa44986 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -508,7 +508,7 @@ def _generate_range( # Nanosecond-granularity timestamps aren't always correctly # representable with doubles, so we limit the range that we # pass to np.linspace as much as possible - periods = cast(int, periods) + periods = cast("int", periods) i8values = ( np.linspace(0, end._value - start._value, periods, dtype="int64") + start._value @@ -2430,7 +2430,7 @@ def _sequence_to_dt64( if data_dtype == object or is_string_dtype(data_dtype): # TODO: We do not have tests specific to string-dtypes, # also complex or categorical or other extension - data = cast(np.ndarray, data) + data = cast("np.ndarray", data) copy = False if lib.infer_dtype(data, skipna=False) == "integer": # Much more performant than going through array_to_datetime @@ -2475,7 +2475,7 @@ def _sequence_to_dt64( # so we need to handle these types. if isinstance(data_dtype, DatetimeTZDtype): # DatetimeArray -> ndarray - data = cast(DatetimeArray, data) + data = cast("DatetimeArray", data) tz = _maybe_infer_tz(tz, data.tz) result = data._ndarray @@ -2484,7 +2484,7 @@ def _sequence_to_dt64( if isinstance(data, DatetimeArray): data = data._ndarray - data = cast(np.ndarray, data) + data = cast("np.ndarray", data) result, copy = _construct_from_dt64_naive( data, tz=tz, copy=copy, ambiguous=ambiguous ) @@ -2495,7 +2495,7 @@ def _sequence_to_dt64( if data.dtype != INT64_DTYPE: data = data.astype(np.int64, copy=False) copy = False - data = cast(np.ndarray, data) + data = cast("np.ndarray", data) result = data.view(out_dtype) if copy: @@ -2760,7 +2760,7 @@ def _validate_dt64_dtype(dtype): # Without this, things like adding an array of timedeltas and # a tz-aware Timestamp (with a tz specific to its datetime) will # be incorrect(ish?) for the array as a whole - dtype = cast(DatetimeTZDtype, dtype) + dtype = cast("DatetimeTZDtype", dtype) dtype = DatetimeTZDtype( unit=dtype.unit, tz=timezones.tz_standardize(dtype.tz) ) @@ -2985,8 +2985,8 @@ def _generate_range( # argument type "None" start = end - (periods - 1) * offset # type: ignore[operator] - start = cast(Timestamp, start) - end = cast(Timestamp, end) + start = cast("Timestamp", start) + end = cast("Timestamp", end) cur = start if offset.n >= 0: diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 07c875337e4f6..670079f90431e 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -148,7 +148,7 @@ def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False) -> Self: @classmethod @doc(ExtensionArray._empty) def _empty(cls, shape: Shape, dtype: ExtensionDtype) -> Self: - dtype = cast(BaseMaskedDtype, dtype) + dtype = cast("BaseMaskedDtype", dtype) values: np.ndarray = np.empty(shape, dtype=dtype.type) values.fill(dtype._internal_fill_value) mask = np.ones(shape, dtype=bool) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index ae92e17332c76..8f595f3bb4425 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -245,7 +245,7 @@ def __init__(self, values, dtype: Dtype | None = None, copy: bool = False) -> No values = np.array(values, dtype="int64", copy=copy) if dtype is None: raise ValueError("dtype is not specified and cannot be inferred") - dtype = cast(PeriodDtype, dtype) + dtype = cast("PeriodDtype", dtype) NDArrayBacked.__init__(self, values, dtype) # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked" diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 137dbb6e4d139..4722e2bb72f5f 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1031,7 +1031,7 @@ def __getitem__( if com.is_bool_indexer(key): # mypy doesn't know we have an array here - key = cast(np.ndarray, key) + key = cast("np.ndarray", key) return self.take(np.arange(len(key), dtype=np.int32)[key]) elif hasattr(key, "__len__"): return self.take(key) @@ -1302,7 +1302,7 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True): dtype = self.dtype.update_dtype(dtype) subtype = pandas_dtype(dtype._subtype_with_str) - subtype = cast(np.dtype, subtype) # ensured by update_dtype + subtype = cast("np.dtype", subtype) # ensured by update_dtype values = ensure_wrapped_if_datetimelike(self.sp_values) sp_values = astype_array(values, subtype, copy=copy) sp_values = np.asarray(sp_values) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 7227ea77ca433..f6bf7d0a10d0c 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -194,7 +194,7 @@ def __init__( elif na_value is not libmissing.NA: raise ValueError(f"'na_value' must be np.nan or pd.NA, got {na_value}") - self.storage = cast(str, storage) + self.storage = cast("str", storage) self._na_value = na_value def __repr__(self) -> str: @@ -449,7 +449,7 @@ def _str_map( # error: Argument 1 to "dtype" has incompatible type # "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected # "Type[object]" - dtype=np.dtype(cast(type, dtype)), + dtype=np.dtype(cast("type", dtype)), ) if not na_value_is_na: @@ -520,7 +520,7 @@ def _str_map_nan_semantics( mask.view("uint8"), convert=False, na_value=na_value, - dtype=np.dtype(cast(type, dtype)), + dtype=np.dtype(cast("type", dtype)), ) if na_value_is_na and is_integer_dtype(dtype) and mask.any(): # TODO: we could alternatively do this check before map_infer_mask diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index d35083fd892a8..a39d64429d162 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -281,7 +281,7 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]: ] # short-circuit to return all False array. - if not len(value_set): + if not value_set: return np.zeros(len(self), dtype=bool) result = pc.is_in( diff --git a/pandas/core/base.py b/pandas/core/base.py index 6cc28d4e46634..d0b0c1249e4ee 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1268,7 +1268,7 @@ def _memory_usage(self, deep: bool = False) -> int: v = self.array.nbytes if deep and is_object_dtype(self.dtype) and not PYPY: - values = cast(np.ndarray, self._values) + values = cast("np.ndarray", self._values) v += lib.memory_usage_of_objects(values) return v diff --git a/pandas/core/common.py b/pandas/core/common.py index 75f8a56aac5db..36cfa2cd321bb 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -308,7 +308,7 @@ def maybe_iterable_to_list(obj: Iterable[T] | T) -> Collection[T] | T: """ if isinstance(obj, abc.Iterable) and not isinstance(obj, abc.Sized): return list(obj) - obj = cast(Collection, obj) + obj = cast("Collection", obj) return obj diff --git a/pandas/core/construction.py b/pandas/core/construction.py index ada492787a179..8151a35efe57b 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -347,7 +347,7 @@ def array( return result.copy() return result - data = cast(np.ndarray, data) + data = cast("np.ndarray", data) result = ensure_wrapped_if_datetimelike(data) if result is not data: result = cast("DatetimeArray | TimedeltaArray", result) @@ -518,7 +518,7 @@ def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray: mask = ma.getmaskarray(data) if mask.any(): dtype, fill_value = maybe_promote(data.dtype, np.nan) - dtype = cast(np.dtype, dtype) + dtype = cast("np.dtype", dtype) data = ma.asarray(data.astype(dtype, copy=True)) data.soften_mask() # set hardmask False if it was True data[mask] = fill_value @@ -658,14 +658,14 @@ def sanitize_array( else: subarr = maybe_convert_platform(data) if subarr.dtype == object: - subarr = cast(np.ndarray, subarr) + subarr = cast("np.ndarray", subarr) subarr = maybe_infer_to_datetimelike(subarr) subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d) if isinstance(subarr, np.ndarray): # at this point we should have dtype be None or subarr.dtype == dtype - dtype = cast(np.dtype, dtype) + dtype = cast("np.dtype", dtype) subarr = _sanitize_str_dtypes(subarr, data, dtype, copy) return subarr @@ -802,7 +802,7 @@ def _try_cast( elif dtype.kind == "U": # TODO: test cases with arr.dtype.kind in "mM" if is_ndarray: - arr = cast(np.ndarray, arr) + arr = cast("np.ndarray", arr) shape = arr.shape if arr.ndim > 1: arr = arr.ravel() @@ -814,7 +814,7 @@ def _try_cast( elif dtype.kind in "mM": if is_ndarray: - arr = cast(np.ndarray, arr) + arr = cast("np.ndarray", arr) if arr.ndim == 2 and arr.shape[1] == 1: # GH#60081: DataFrame Constructor converts 1D data to array of # shape (N, 1), but maybe_cast_to_datetime assumes 1D input diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index dae04ba6244d4..c0687c50e5068 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -134,7 +134,7 @@ def maybe_convert_platform( arr = values if arr.dtype == _dtype_obj: - arr = cast(np.ndarray, arr) + arr = cast("np.ndarray", arr) arr = lib.maybe_convert_objects(arr) return arr @@ -305,11 +305,11 @@ def maybe_downcast_to_dtype(result: ArrayLike, dtype: str | np.dtype) -> ArrayLi elif dtype.kind == "m" and result.dtype == _dtype_obj: # test_where_downcast_to_td64 - result = cast(np.ndarray, result) + result = cast("np.ndarray", result) result = array_to_timedelta64(result) elif dtype == np.dtype("M8[ns]") and result.dtype == _dtype_obj: - result = cast(np.ndarray, result) + result = cast("np.ndarray", result) return np.asarray(maybe_cast_to_datetime(result, dtype=dtype)) return result diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 570074e047da6..8746ea0fc28e9 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -346,7 +346,7 @@ def _from_values_or_dtype( # ordered=None. dtype = CategoricalDtype(categories, ordered) - return cast(CategoricalDtype, dtype) + return cast("CategoricalDtype", dtype) @classmethod def construct_from_string(cls, string: str_type) -> CategoricalDtype: @@ -614,7 +614,7 @@ def update_dtype(self, dtype: str_type | CategoricalDtype) -> CategoricalDtype: ) else: # from here on, dtype is a CategoricalDtype - dtype = cast(CategoricalDtype, dtype) + dtype = cast("CategoricalDtype", dtype) # update categories/ordered unless they've been explicitly passed as None if ( @@ -968,7 +968,9 @@ def __setstate__(self, state) -> None: def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: if all(isinstance(t, DatetimeTZDtype) and t.tz == self.tz for t in dtypes): - np_dtype = np.max([cast(DatetimeTZDtype, t).base for t in [self, *dtypes]]) + np_dtype = np.max( + [cast("DatetimeTZDtype", t).base for t in [self, *dtypes]] + ) unit = np.datetime_data(np_dtype)[0] return type(self)(unit=unit, tz=self.tz) return super()._get_common_dtype(dtypes) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6158e19737185..37ac677c6baf8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -787,7 +787,7 @@ def __init__( elif isinstance(data, (np.ndarray, Series, Index, ExtensionArray)): if data.dtype.names: # i.e. numpy structured array - data = cast(np.ndarray, data) + data = cast("np.ndarray", data) mgr = rec_array_to_mgr( data, index, @@ -3814,7 +3814,7 @@ def transpose( ) new_values = transpose_homogeneous_masked_arrays( - cast(Sequence[BaseMaskedArray], self._iter_column_arrays()) + cast("Sequence[BaseMaskedArray]", self._iter_column_arrays()) ) elif isinstance(first_dtype, ArrowDtype): # We have arrow EAs with the same dtype. We can transpose faster. @@ -3824,7 +3824,7 @@ def transpose( ) new_values = transpose_homogeneous_pyarrow( - cast(Sequence[ArrowExtensionArray], self._iter_column_arrays()) + cast("Sequence[ArrowExtensionArray]", self._iter_column_arrays()) ) else: # We have other EAs with the same dtype. We preserve dtype in transpose. @@ -4409,7 +4409,7 @@ def _set_value( try: if takeable: icol = col - iindex = cast(int, index) + iindex = cast("int", index) else: icol = self.columns.get_loc(col) iindex = self.index.get_loc(index) @@ -5743,7 +5743,7 @@ def shift( axis = self._get_axis_number(axis) if is_list_like(periods): - periods = cast(Sequence, periods) + periods = cast("Sequence", periods) if axis == 1: raise ValueError( "If `periods` contains multiple shifts, `axis` cannot be 1." @@ -5758,7 +5758,7 @@ def shift( raise TypeError( f"Periods must be integer, but {period} is {type(period)}." ) - period = cast(int, period) + period = cast("int", period) shifted_dataframes.append( super() .shift(periods=period, freq=freq, axis=axis, fill_value=fill_value) @@ -5767,7 +5767,7 @@ def shift( return concat(shifted_dataframes, axis=1) elif suffix: raise ValueError("Cannot specify `suffix` if `periods` is an int.") - periods = cast(int, periods) + periods = cast("int", periods) ncols = len(self.columns) if axis == 1 and periods != 0 and ncols > 0 and freq is None: @@ -6548,7 +6548,7 @@ def dropna( if subset is not None: # subset needs to be list if not is_list_like(subset): - subset = [cast(Hashable, subset)] + subset = [cast("Hashable", subset)] ax = self._get_axis(agg_axis) indices = ax.get_indexer_for(subset) check = indices == -1 @@ -6828,7 +6828,7 @@ def f(vals) -> tuple[np.ndarray, int]: subset = (subset,) # needed for mypy since can't narrow types using np.iterable - subset = cast(Sequence, subset) + subset = cast("Sequence", subset) # Verify all columns in subset exist in the queried dataframe # Otherwise, raise a KeyError, same as if you try to __getitem__ with a @@ -10511,7 +10511,7 @@ def apply( # one axis is empty if not all(self.shape): - func = cast(Callable, func) + func = cast("Callable", func) try: if axis == 0: r = func(Series([], dtype=np.float64), *args, **kwargs) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6a45ef9325bec..5fbb8ef24e566 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1447,7 +1447,7 @@ def equals(self, other: object) -> bool: """ if not (isinstance(other, type(self)) or isinstance(self, type(other))): return False - other = cast(NDFrame, other) + other = cast("NDFrame", other) return self._mgr.equals(other._mgr) # ------------------------------------------------------------------------- @@ -2132,7 +2132,7 @@ def _repr_data_resource_(self): data = self.head(config.get_option("display.max_rows")) as_json = data.to_json(orient="table") - as_json = cast(str, as_json) + as_json = cast("str", as_json) return loads(as_json, object_pairs_hook=collections.OrderedDict) # ---------------------------------------------------------------------- @@ -6472,7 +6472,7 @@ def astype( result.columns = self.columns result = result.__finalize__(self, method="astype") # https://github.com/python/mypy/issues/8354 - return cast(Self, result) + return cast("Self", result) @final def copy(self, deep: bool = True) -> Self: @@ -9520,7 +9520,7 @@ def align( else: # pragma: no cover raise TypeError(f"unsupported type: {type(other)}") - right = cast(NDFrameT, _right) + right = cast("NDFrameT", _right) if self.ndim == 1 or axis == 0: # If we are aligning timezone-aware DatetimeIndexes and the timezones # do not match, convert both to UTC. @@ -9705,7 +9705,7 @@ def _where( # CoW: Make sure reference is not kept alive if cond.ndim == 1 and self.ndim == 2: cond = cond._constructor_expanddim( - {i: cond for i in range(len(self.columns))}, + dict.fromkeys(range(len(self.columns)), cond), copy=False, ) cond.columns = self.columns @@ -10243,7 +10243,7 @@ def shift( return self.to_frame().shift( periods=periods, freq=freq, axis=axis, fill_value=fill_value ) - periods = cast(int, periods) + periods = cast("int", periods) if freq is None: # when freq is None, data is shifted, index is not diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 1251403db6ff3..92e7c850f3619 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1016,7 +1016,7 @@ def value_counts( if isinstance(lab.dtype, IntervalDtype): # TODO: should we do this inside II? - lab_interval = cast(Interval, lab) + lab_interval = cast("Interval", lab) sorter = np.lexsort((lab_interval.left, lab_interval.right, ids)) else: @@ -1930,9 +1930,9 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) elif relabeling: # this should be the only (non-raising) case with relabeling # used reordered index of columns - result = cast(DataFrame, result) + result = cast("DataFrame", result) result = result.iloc[:, order] - result = cast(DataFrame, result) + result = cast("DataFrame", result) # error: Incompatible types in assignment (expression has type # "Optional[List[str]]", variable has type # "Union[Union[Union[ExtensionArray, ndarray[Any, Any]], @@ -1980,7 +1980,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) else: # GH#32040, GH#35246 # e.g. test_groupby_as_index_select_column_sum_empty_df - result = cast(DataFrame, result) + result = cast("DataFrame", result) result.columns = self._obj_with_exclusions.columns.copy() if not self.as_index: @@ -2505,7 +2505,7 @@ def _apply_to_column_groupbys(self, func) -> DataFrame: ) results = [func(sgb) for sgb in sgbs] - if not len(results): + if not results: # concat would raise res_df = DataFrame([], columns=columns, index=self._grouper.result_index) else: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index f9438b348c140..f01f953543e5b 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1827,7 +1827,7 @@ def _reduction_kernel_transform( # GH#49834 - result needs groups in the index for # _wrap_transform_fast_result if func in ["idxmin", "idxmax"]: - func = cast(Literal["idxmin", "idxmax"], func) + func = cast("Literal['idxmin', 'idxmax']", func) result = self._idxmax_idxmin(func, True, *args, **kwargs) else: if engine is not None: @@ -2659,7 +2659,7 @@ def _value_counts( observed=self.observed, dropna=self.dropna, ) - result_series = cast(Series, gb.size()) + result_series = cast("Series", gb.size()) result_series.name = name if sort: @@ -4206,7 +4206,7 @@ def _nth( # old behaviour, but with all and any support for DataFrames. # modified in GH 7559 to have better perf - n = cast(int, n) + n = cast("int", n) dropped = self._selected_obj.dropna(how=dropna, axis=0) # get a new grouper for our dropped obj @@ -5029,7 +5029,7 @@ def shift( goldfish 5.0 8.0 """ if is_list_like(periods): - periods = cast(Sequence, periods) + periods = cast("Sequence", periods) if len(periods) == 0: raise ValueError("If `periods` is an iterable, it cannot be empty.") from pandas.core.reshape.concat import concat @@ -5042,7 +5042,7 @@ def shift( ) if suffix: raise ValueError("Cannot specify `suffix` if `periods` is an int.") - periods = [cast(int, periods)] + periods = [cast("int", periods)] add_suffix = False shifted_dataframes = [] @@ -5051,7 +5051,7 @@ def shift( raise TypeError( f"Periods must be integer, but {period} is {type(period)}." ) - period = cast(int, period) + period = cast("int", period) if freq is not None: f = lambda x: x.shift( period, @@ -5081,11 +5081,11 @@ def shift( if add_suffix: if isinstance(shifted, Series): - shifted = cast(NDFrameT, shifted.to_frame()) + shifted = cast("NDFrameT", shifted.to_frame()) shifted = shifted.add_suffix( f"{suffix}_{period}" if suffix else f"_{period}" ) - shifted_dataframes.append(cast(Union[Series, DataFrame], shifted)) + shifted_dataframes.append(cast("Union[Series, DataFrame]", shifted)) return ( shifted_dataframes[0] @@ -5175,8 +5175,8 @@ def diff( shifted = shifted.astype("float32") else: to_coerce = [c for c, dtype in obj.dtypes.items() if dtype in dtypes_to_f32] - if len(to_coerce): - shifted = shifted.astype({c: "float32" for c in to_coerce}) + if to_coerce: + shifted = shifted.astype(dict.fromkeys(to_coerce, "float32")) return obj - shifted @@ -5648,7 +5648,7 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde lev_codes = coerce_indexer_dtype(lev_codes, lev) if idx._is_multi: - idx = cast(MultiIndex, idx) + idx = cast("MultiIndex", idx) levels = list(idx.levels) + [lev] codes = [np.repeat(x, nqs) for x in idx.codes] + [np.tile(lev_codes, len(idx))] mi = MultiIndex(levels=levels, codes=codes, names=idx.names + [None]) diff --git a/pandas/core/groupby/indexing.py b/pandas/core/groupby/indexing.py index c658f625d5ea9..42f2ad16419c3 100644 --- a/pandas/core/groupby/indexing.py +++ b/pandas/core/groupby/indexing.py @@ -114,7 +114,7 @@ def _positional_selector(self) -> GroupByPositionalSelector: 4 b 5 """ if TYPE_CHECKING: - groupby_self = cast(groupby.GroupBy, self) + groupby_self = cast("groupby.GroupBy", self) else: groupby_self = self @@ -125,15 +125,15 @@ def _make_mask_from_positional_indexer( arg: PositionalIndexer | tuple, ) -> np.ndarray: if is_list_like(arg): - if all(is_integer(i) for i in cast(Iterable, arg)): - mask = self._make_mask_from_list(cast(Iterable[int], arg)) + if all(is_integer(i) for i in cast("Iterable", arg)): + mask = self._make_mask_from_list(cast("Iterable[int]", arg)) else: - mask = self._make_mask_from_tuple(cast(tuple, arg)) + mask = self._make_mask_from_tuple(cast("tuple", arg)) elif isinstance(arg, slice): mask = self._make_mask_from_slice(arg) elif is_integer(arg): - mask = self._make_mask_from_int(cast(int, arg)) + mask = self._make_mask_from_int(cast("int", arg)) else: raise TypeError( f"Invalid index {type(arg)}. " @@ -147,7 +147,7 @@ def _make_mask_from_positional_indexer( else: mask = self._ascending_count < 0 - return cast(np.ndarray, mask) + return cast("np.ndarray", mask) def _make_mask_from_int(self, arg: int) -> np.ndarray: if arg >= 0: @@ -174,7 +174,7 @@ def _make_mask_from_tuple(self, args: tuple) -> bool | np.ndarray: for arg in args: if is_integer(arg): - mask |= self._make_mask_from_int(cast(int, arg)) + mask |= self._make_mask_from_int(cast("int", arg)) elif isinstance(arg, slice): mask |= self._make_mask_from_slice(arg) else: @@ -229,7 +229,7 @@ def _make_mask_from_slice(self, arg: slice) -> bool | np.ndarray: @cache_readonly def _ascending_count(self) -> np.ndarray: if TYPE_CHECKING: - groupby_self = cast(groupby.GroupBy, self) + groupby_self = cast("groupby.GroupBy", self) else: groupby_self = self @@ -238,7 +238,7 @@ def _ascending_count(self) -> np.ndarray: @cache_readonly def _descending_count(self) -> np.ndarray: if TYPE_CHECKING: - groupby_self = cast(groupby.GroupBy, self) + groupby_self = cast("groupby.GroupBy", self) else: groupby_self = self diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index c404323a1168c..5f818b9e01c04 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -221,17 +221,17 @@ def to_pytimedelta(self): FutureWarning, stacklevel=find_stack_level(), ) - return cast(ArrowExtensionArray, self._parent.array)._dt_to_pytimedelta() + return cast("ArrowExtensionArray", self._parent.array)._dt_to_pytimedelta() def to_pydatetime(self) -> Series: # GH#20306 - return cast(ArrowExtensionArray, self._parent.array)._dt_to_pydatetime() + return cast("ArrowExtensionArray", self._parent.array)._dt_to_pydatetime() def isocalendar(self) -> DataFrame: from pandas import DataFrame result = ( - cast(ArrowExtensionArray, self._parent.array) + cast("ArrowExtensionArray", self._parent.array) ._dt_isocalendar() ._pa_array.combine_chunks() ) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 058e584336905..2f7a35646d964 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -171,7 +171,7 @@ def safe_sort_index(index: Index) -> Index: if isinstance(array_sorted, Index): return array_sorted - array_sorted = cast(np.ndarray, array_sorted) + array_sorted = cast("np.ndarray", array_sorted) if isinstance(index, MultiIndex): index = MultiIndex.from_tuples(array_sorted, names=index.names) else: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ff3879018674e..7322d1ec8e894 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -288,7 +288,7 @@ def join( ridx = ensure_platform_int(ridx) return join_index, lidx, ridx - return cast(F, join) + return cast("F", join) def _new_Index(cls, d): @@ -862,7 +862,7 @@ def _engine( elif self._engine_type is libindex.ObjectEngine: return libindex.ExtensionEngine(target_values) - target_values = cast(np.ndarray, target_values) + target_values = cast("np.ndarray", target_values) # to avoid a reference cycle, bind `target_values` to a local variable, so # `self` is not passed into the lambda. if target_values.dtype == bool: @@ -1472,7 +1472,7 @@ def _get_level_names(self) -> range | Sequence[Hashable]: def _mpl_repr(self) -> np.ndarray: # how to represent ourselves to matplotlib if isinstance(self.dtype, np.dtype) and self.dtype.kind != "M": - return cast(np.ndarray, self.values) + return cast("np.ndarray", self.values) return self.astype(object, copy=False)._values _default_na_rep = "NaN" @@ -4454,7 +4454,7 @@ def _join_empty( ridx: np.ndarray | None if len(other): - how = cast(JoinHow, {"left": "right", "right": "left"}.get(how, how)) + how = cast("JoinHow", {"left": "right", "right": "left"}.get(how, how)) join_index, ridx, lidx = other._join_empty(self, how, sort) elif how in ["left", "outer"]: if sort and not self.is_monotonic_increasing: @@ -4733,7 +4733,7 @@ def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]: if keep_order: # just drop missing values. o.w. keep order left_indexer = np.arange(len(left), dtype=np.intp) - left_indexer = cast(np.ndarray, left_indexer) + left_indexer = cast("np.ndarray", left_indexer) mask = new_lev_codes != -1 if not mask.all(): new_codes = [lab[mask] for lab in new_codes] @@ -5493,7 +5493,7 @@ def equals(self, other: Any) -> bool: if not isinstance(other, type(self)): return False - earr = cast(ExtensionArray, self._data) + earr = cast("ExtensionArray", self._data) return earr.equals(other._data) if isinstance(other.dtype, ExtensionDtype): @@ -5794,7 +5794,7 @@ def sort_values( items=self, ascending=ascending, na_position=na_position, key=key ) else: - idx = cast(Index, ensure_key_mapped(self, key)) + idx = cast("Index", ensure_key_mapped(self, key)) _as = idx.argsort(na_position=na_position) if not ascending: _as = _as[::-1] diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index d20a84449fb85..98d0c01f05ad0 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -246,7 +246,7 @@ def _is_dtype_compat(self, other: Index) -> Categorical: """ if isinstance(other.dtype, CategoricalDtype): cat = extract_array(other) - cat = cast(Categorical, cat) + cat = cast("Categorical", cat) if not cat._categories_match_up_to_permutation(self._values): raise TypeError( "categories must match existing categories when appending" diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 8b316de30662c..4c1e2f901a00f 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -534,7 +534,7 @@ def inferred_freq(self) -> str | None: def _as_range_index(self) -> RangeIndex: # Convert our i8 representations to RangeIndex # Caller is responsible for checking isinstance(self.freq, Tick) - freq = cast(Tick, self.freq) + freq = cast("Tick", self.freq) tick = Timedelta(freq).as_unit(self.unit)._value rng = range(self[0]._value, self[-1]._value + tick, tick) return RangeIndex(rng) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 29b34f560ab2e..24726a67bdc12 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -194,7 +194,7 @@ def new_meth(self_or_cls, *args, **kwargs): return meth(self_or_cls, *args, **kwargs) - return cast(F, new_meth) + return cast("F", new_meth) @set_module("pandas") @@ -560,7 +560,7 @@ def from_tuples( raise TypeError("Input must be a list / sequence of tuple-likes.") if is_iterator(tuples): tuples = list(tuples) - tuples = cast(Collection[tuple[Hashable, ...]], tuples) + tuples = cast("Collection[tuple[Hashable, ...]]", tuples) # handling the empty tuple cases if len(tuples) and all(isinstance(e, tuple) and not e for e in tuples): @@ -590,7 +590,7 @@ def from_tuples( arrays = list(lib.to_object_array_tuples(tuples).T) else: arrs = zip_longest(*tuples, fillvalue=np.nan) - arrays = cast(list[Sequence[Hashable]], arrs) + arrays = cast("list[Sequence[Hashable]]", arrs) return cls.from_arrays(arrays, sortorder=sortorder, names=names) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 2db50bbbdfa37..69cbd994f9a2b 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -1094,7 +1094,7 @@ def _concat(self, indexes: list[Index], name: Hashable) -> Index: elif len(indexes) == 1: return indexes[0] - rng_indexes = cast(list[RangeIndex], indexes) + rng_indexes = cast("list[RangeIndex]", indexes) start = step = next_ = None diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index bbbcc4da9fb39..f682eb5a7330b 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1062,7 +1062,7 @@ def _getitem_lowerdim(self, tup: tuple): # is equivalent. # (see the other place where we call _handle_lowerdim_multi_index_axis0) with suppress(IndexingError): - return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0(tup) + return cast("_LocIndexer", self)._handle_lowerdim_multi_index_axis0(tup) tup = self._validate_key_length(tup) @@ -1129,7 +1129,7 @@ def _contains_slice(x: object) -> bool: # DataFrame, IndexingError is not raised when slice(None,None,None) # with one row. with suppress(IndexingError): - return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0( + return cast("_LocIndexer", self)._handle_lowerdim_multi_index_axis0( tup ) elif isinstance(self.obj, ABCSeries) and any( diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index dc64da35e9725..a77def02fe033 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -805,7 +805,7 @@ def replace_list( for x, y in zip(src_list, dest_list) if (self._can_hold_element(x) or (self.dtype == "string" and is_re(x))) ] - if not len(pairs): + if not pairs: return [self.copy(deep=False)] src_len = len(pairs) - 1 @@ -817,7 +817,7 @@ def replace_list( masks: Iterable[npt.NDArray[np.bool_]] = ( extract_bool_array( cast( - ArrayLike, + "ArrayLike", compare_or_regex_search( values, s[0], regex=regex, mask=na_mask ), @@ -1098,7 +1098,7 @@ def setitem(self, indexer, value) -> Block: value = self._standardize_fill_value(value) - values = cast(np.ndarray, self.values) + values = cast("np.ndarray", self.values) if self.ndim == 2: values = values.T @@ -1124,7 +1124,7 @@ def setitem(self, indexer, value) -> Block: casted = setitem_datetimelike_compat(values, len(vi), casted) self = self._maybe_copy(inplace=True) - values = cast(np.ndarray, self.values.T) + values = cast("np.ndarray", self.values.T) if isinstance(casted, np.ndarray) and casted.ndim == 1 and len(casted) == 1: # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615 casted = casted[0, ...] @@ -1155,7 +1155,7 @@ def putmask(self, mask, new) -> list[Block]: List[Block] """ orig_mask = mask - values = cast(np.ndarray, self.values) + values = cast("np.ndarray", self.values) mask, noop = validate_putmask(values.T, mask) assert not isinstance(new, (ABCIndex, ABCSeries, ABCDataFrame)) @@ -1172,7 +1172,7 @@ def putmask(self, mask, new) -> list[Block]: casted = np_can_hold_element(values.dtype, new) self = self._maybe_copy(inplace=True) - values = cast(np.ndarray, self.values) + values = cast("np.ndarray", self.values) putmask_without_repeat(values.T, mask, casted) return [self] @@ -1226,7 +1226,7 @@ def where(self, other, cond) -> list[Block]: cond = extract_bool_array(cond) # EABlocks override where - values = cast(np.ndarray, self.values) + values = cast("np.ndarray", self.values) orig_other = other if transpose: values = values.T @@ -1357,7 +1357,7 @@ def pad_or_backfill( # Dispatch to the NumpyExtensionArray method. # We know self.array_values is a NumpyExtensionArray bc EABlock overrides - vals = cast(NumpyExtensionArray, self.array_values) + vals = cast("NumpyExtensionArray", self.array_values) new_values = vals.T._pad_or_backfill( method=method, limit=limit, @@ -1449,7 +1449,7 @@ def shift(self, periods: int, fill_value: Any = None) -> list[Block]: return nb.shift(periods, fill_value=fill_value) else: - values = cast(np.ndarray, self.values) + values = cast("np.ndarray", self.values) new_values = shift(values, periods, axis, casted) return [self.make_block_same_class(new_values)] @@ -1525,7 +1525,7 @@ def delete(self, loc) -> list[Block]: loc = [loc] if self.ndim == 1: - values = cast(np.ndarray, self.values) + values = cast("np.ndarray", self.values) values = np.delete(values, loc) mgr_locs = self._mgr_locs.delete(loc) return [type(self)(values, placement=mgr_locs, ndim=self.ndim)] diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 2ee7d3948a70f..af80a6cae57bd 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -361,7 +361,7 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: # we want to avoid filling with np.nan if we are # using None; we already know that we are all # nulls - values = cast(np.ndarray, self.block.values) + values = cast("np.ndarray", self.block.values) if values.size and values[0, 0] is None: fill_value = None diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 69da2be0306f6..d098f8d42d3db 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -864,7 +864,7 @@ def _finalize_columns_and_data( # GH#26429 do not raise user-facing AssertionError raise ValueError(err) from err - if len(contents) and contents[0].dtype == np.object_: + if contents and contents[0].dtype == np.object_: contents = convert_object_array(contents, dtype=dtype) return contents, columns diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index a3738bb25f56c..7d0c446839fb9 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -131,7 +131,7 @@ def ensure_np_dtype(dtype: DtypeObj) -> np.dtype: # Give EAs some input on what happens here. Sparse needs this. if isinstance(dtype, SparseDtype): dtype = dtype.subtype - dtype = cast(np.dtype, dtype) + dtype = cast("np.dtype", dtype) elif isinstance(dtype, ExtensionDtype): dtype = np.dtype("object") elif dtype == np.dtype(str): @@ -1238,7 +1238,7 @@ def iset( # containing (self._blknos[loc], BlockPlacement(slice(0, 1, 1))) # Check if we can use _iset_single fastpath - loc = cast(int, loc) + loc = cast("int", loc) blkno = self.blknos[loc] blk = self.blocks[blkno] if len(blk._mgr_locs) == 1: # TODO: fastest way to check this? @@ -1298,7 +1298,7 @@ def value_getitem(placement): # Defer setting the new values to enable consolidation self._iset_split_block(blkno_l, blk_locs, refs=refs) - if len(removed_blknos): + if removed_blknos: # Remove blocks & update blknos accordingly is_deleted = np.zeros(self.nblocks, dtype=np.bool_) is_deleted[removed_blknos] = True @@ -2429,7 +2429,7 @@ def _merge_blocks( new_values = np.vstack([b.values for b in blocks]) # type: ignore[misc] else: bvals = [blk.values for blk in blocks] - bvals2 = cast(Sequence[NDArrayBackedExtensionArray], bvals) + bvals2 = cast("Sequence[NDArrayBackedExtensionArray]", bvals) new_values = bvals2[0]._concat_same_type(bvals2, axis=0) argsort = np.argsort(new_mgr_locs) @@ -2475,7 +2475,7 @@ def make_na_array(dtype: DtypeObj, shape: Shape, fill_value) -> ArrayLike: return DatetimeArray._simple_new(dt64values, dtype=dtype) elif is_1d_only_ea_dtype(dtype): - dtype = cast(ExtensionDtype, dtype) + dtype = cast("ExtensionDtype", dtype) cls = dtype.construct_array_type() missing_arr = cls._from_sequence([], dtype=dtype) diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index 944e28a9b0238..f776c49488034 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -95,7 +95,7 @@ def describe_ndframe( ) result = describer.describe(percentiles=percentiles) - return cast(NDFrameT, result) + return cast("NDFrameT", result) class NDFrameDescriberAbstract(ABC): diff --git a/pandas/core/methods/selectn.py b/pandas/core/methods/selectn.py index 02e7445f1d275..f292fe5c5bd98 100644 --- a/pandas/core/methods/selectn.py +++ b/pandas/core/methods/selectn.py @@ -197,7 +197,7 @@ def __init__(self, obj: DataFrame, n: int, keep: str, columns: IndexLabel) -> No if not is_list_like(columns) or isinstance(columns, tuple): columns = [columns] - columns = cast(Sequence[Hashable], columns) + columns = cast("Sequence[Hashable]", columns) columns = list(columns) self.columns = columns diff --git a/pandas/core/missing.py b/pandas/core/missing.py index e2fb3b9a6fc0b..073c8ecd8d047 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -421,7 +421,7 @@ def _index_to_interp_indices(index: Index, method: str) -> np.ndarray: if method == "linear": inds = xarr - inds = cast(np.ndarray, inds) + inds = cast("np.ndarray", inds) else: inds = np.asarray(xarr) @@ -884,7 +884,7 @@ def new_func( return func(values, limit=limit, limit_area=limit_area, mask=mask) - return cast(F, new_func) + return cast("F", new_func) @_datetimelike_compat diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 25fb6e6181082..e6f1c209312e4 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -94,7 +94,7 @@ def _f(*args, **kwargs): raise TypeError(e) from e raise - return cast(F, _f) + return cast("F", _f) class bottleneck_switch: @@ -150,7 +150,7 @@ def f( return result - return cast(F, f) + return cast("F", f) def _bn_ok_dtype(dtype: DtypeObj, name: str) -> bool: @@ -413,7 +413,7 @@ def new_func( return result - return cast(F, new_func) + return cast("F", new_func) def _na_for_min_count(values: np.ndarray, axis: AxisInt | None) -> Scalar | np.ndarray: @@ -478,7 +478,7 @@ def newfunc(values: np.ndarray, *, axis: AxisInt | None = None, **kwargs): return func(values, axis=axis, **kwargs) - return cast(F, newfunc) + return cast("F", newfunc) def nanany( @@ -712,7 +712,7 @@ def nanmean( the_sum = _ensure_numeric(the_sum) if axis is not None and getattr(the_sum, "ndim", False): - count = cast(np.ndarray, count) + count = cast("np.ndarray", count) with np.errstate(all="ignore"): # suppress division by zero warnings the_mean = the_sum / count @@ -898,7 +898,7 @@ def _get_counts_nanvar( d = np.nan else: # count is not narrowed by is_float check - count = cast(np.ndarray, count) + count = cast("np.ndarray", count) mask = count <= ddof if mask.any(): np.putmask(d, mask, np.nan) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 753f7fb6cea1a..6d1797348e487 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1797,7 +1797,7 @@ def _gotitem(self, key, ndim, subset=None): new_rs = type(self)( groupby=groupby, - parent=cast(Resampler, self), + parent=cast("Resampler", self), selection=selection, ) return new_rs @@ -2308,7 +2308,7 @@ def _get_grouper( ) -> tuple[BinGrouper, NDFrameT]: # create the resampler and return our binner r = self._get_resampler(obj) - return r._grouper, cast(NDFrameT, r.obj) + return r._grouper, cast("NDFrameT", r.obj) def _get_time_bins(self, ax: DatetimeIndex): if not isinstance(ax, DatetimeIndex): @@ -2558,7 +2558,7 @@ def _set_grouper( if isinstance(ax.dtype, ArrowDtype) and ax.dtype.kind in "Mm": self._arrow_dtype = ax.dtype ax = Index( - cast(ArrowExtensionArray, ax.array)._maybe_convert_datelike_array() + cast("ArrowExtensionArray", ax.array)._maybe_convert_datelike_array() ) return obj, ax, indexer diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 09be82c59a5c6..fae33ffd116ef 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1060,7 +1060,7 @@ def _validate_how( if how in {"left_anti", "right_anti"}: how = how.split("_")[0] # type: ignore[assignment] anti_join = True - how = cast(JoinHow | Literal["asof"], how) + how = cast("JoinHow | Literal['asof']", how) return how, anti_join def _maybe_require_matching_dtypes( @@ -1564,16 +1564,16 @@ def _get_merge_keys( lk = extract_array(lk, extract_numpy=True) rk = extract_array(rk, extract_numpy=True) if is_lkey(lk): - lk = cast(ArrayLike, lk) + lk = cast("ArrayLike", lk) left_keys.append(lk) if is_rkey(rk): - rk = cast(ArrayLike, rk) + rk = cast("ArrayLike", rk) right_keys.append(rk) join_names.append(None) # what to do? else: # Then we're either Hashable or a wrong-length arraylike, # the latter of which will raise - rk = cast(Hashable, rk) + rk = cast("Hashable", rk) if rk is not None: right_keys.append(right._get_label_or_level_values(rk)) join_names.append(rk) @@ -1585,7 +1585,7 @@ def _get_merge_keys( if not is_rkey(rk): # Then we're either Hashable or a wrong-length arraylike, # the latter of which will raise - rk = cast(Hashable, rk) + rk = cast("Hashable", rk) if rk is not None: right_keys.append(right._get_label_or_level_values(rk)) else: @@ -1594,12 +1594,12 @@ def _get_merge_keys( if lk is not None and lk == rk: # FIXME: what about other NAs? right_drop.append(rk) else: - rk = cast(ArrayLike, rk) + rk = cast("ArrayLike", rk) right_keys.append(rk) if lk is not None: # Then we're either Hashable or a wrong-length arraylike, # the latter of which will raise - lk = cast(Hashable, lk) + lk = cast("Hashable", lk) left_keys.append(left._get_label_or_level_values(lk)) join_names.append(lk) else: @@ -1610,13 +1610,13 @@ def _get_merge_keys( for k in self.left_on: if is_lkey(k): k = extract_array(k, extract_numpy=True) - k = cast(ArrayLike, k) + k = cast("ArrayLike", k) left_keys.append(k) join_names.append(None) else: # Then we're either Hashable or a wrong-length arraylike, # the latter of which will raise - k = cast(Hashable, k) + k = cast("Hashable", k) left_keys.append(left._get_label_or_level_values(k)) join_names.append(k) if isinstance(self.right.index, MultiIndex): @@ -1632,13 +1632,13 @@ def _get_merge_keys( for k in self.right_on: k = extract_array(k, extract_numpy=True) if is_rkey(k): - k = cast(ArrayLike, k) + k = cast("ArrayLike", k) right_keys.append(k) join_names.append(None) else: # Then we're either Hashable or a wrong-length arraylike, # the latter of which will raise - k = cast(Hashable, k) + k = cast("Hashable", k) right_keys.append(right._get_label_or_level_values(k)) join_names.append(k) if isinstance(self.left.index, MultiIndex): @@ -1682,8 +1682,8 @@ def _maybe_coerce_merge_keys(self) -> None: # if either left or right is a categorical # then the must match exactly in categories & ordered if lk_is_cat and rk_is_cat: - lk = cast(Categorical, lk) - rk = cast(Categorical, rk) + lk = cast("Categorical", lk) + rk = cast("Categorical", rk) if lk._categories_match_up_to_permutation(rk): continue @@ -1836,11 +1836,11 @@ def _maybe_coerce_merge_keys(self) -> None: # columns, and end up trying to merge # incompatible dtypes. See GH 16900. if name in self.left.columns: - typ = cast(Categorical, lk).categories.dtype if lk_is_cat else object + typ = cast("Categorical", lk).categories.dtype if lk_is_cat else object self.left = self.left.copy() self.left[name] = self.left[name].astype(typ) if name in self.right.columns: - typ = cast(Categorical, rk).categories.dtype if rk_is_cat else object + typ = cast("Categorical", rk).categories.dtype if rk_is_cat else object self.right = self.right.copy() self.right[name] = self.right[name].astype(typ) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index c60fe71a7ff28..3bf65c599e08d 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -424,7 +424,7 @@ def _unstack_multiple( # NOTE: This doesn't deal with hierarchical columns yet index = data.index - index = cast(MultiIndex, index) # caller is responsible for checking + index = cast("MultiIndex", index) # caller is responsible for checking # GH 19966 Make sure if MultiIndexed index has tuple name, they will be # recognised as a whole @@ -796,7 +796,7 @@ def _convert_level_number(level_num: int, columns: Index): this = this.sort_index(level=level_to_sort, axis=1) mi_cols = this.columns - mi_cols = cast(MultiIndex, mi_cols) + mi_cols = cast("MultiIndex", mi_cols) new_columns = _stack_multi_column_index(mi_cols) # time to ravel the values diff --git a/pandas/core/series.py b/pandas/core/series.py index 03a2ce85a08c9..171a76638f7da 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2878,7 +2878,7 @@ def autocorr(self, lag: int = 1) -> float: >>> s.autocorr() nan """ - return self.corr(cast(Series, self.shift(lag))) + return self.corr(cast("Series", self.shift(lag))) def dot(self, other: AnyArrayLike | DataFrame) -> Series | np.ndarray: """ @@ -3547,7 +3547,7 @@ def sort_values( self._get_axis_number(axis) if is_list_like(ascending): - ascending = cast(Sequence[bool], ascending) + ascending = cast("Sequence[bool]", ascending) if len(ascending) != 1: raise ValueError( f"Length of ascending ({len(ascending)}) must be 1 for Series" @@ -3561,7 +3561,7 @@ def sort_values( # GH 35922. Make sorting stable by leveraging nargsort if key: - values_to_sort = cast(Series, ensure_key_mapped(self, key))._values + values_to_sort = cast("Series", ensure_key_mapped(self, key))._values else: values_to_sort = self._values sorted_index = nargsort(values_to_sort, kind, bool(ascending), na_position) @@ -5932,7 +5932,7 @@ def _binop(self, other: Series, func, level=None, fill_value=None) -> Series: name = ops.get_op_result_name(self, other) out = this._construct_result(result, name, other) - return cast(Series, out) + return cast("Series", out) def _construct_result( self, diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 0d8f42694ccb4..2d793d71e1e2f 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -112,7 +112,7 @@ def get_indexer_indexer( indexer = nargsort( target, kind=kind, - ascending=cast(bool, ascending), + ascending=cast("bool", ascending), na_position=na_position, ) return indexer @@ -344,7 +344,7 @@ def lexsort_indexer( for k, order in zip(reversed(keys), orders): k = ensure_key_mapped(k, key) if codes_given: - codes = cast(np.ndarray, k) + codes = cast("np.ndarray", k) n = codes.max() + 1 if len(codes) else 0 else: cat = Categorical(k, ordered=True) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 81f7441846589..32176764ae693 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -144,7 +144,7 @@ def wrapper(self, *args, **kwargs): return func(self, *args, **kwargs) wrapper.__name__ = func_name - return cast(F, wrapper) + return cast("F", wrapper) return _forbid_nonstring_types diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index 0adb7b51cf2b7..df89c38fc6f02 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -216,7 +216,7 @@ def _str_replace( def _str_repeat(self, repeats: int | Sequence[int]): if lib.is_integer(repeats): - rint = cast(int, repeats) + rint = cast("int", repeats) def scalar_rep(x): try: diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 0a10001a3113f..1aa23aae8c5c6 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -373,7 +373,7 @@ def _convert_listlike_datetimes( if utc: # pyarrow uses UTC, not lowercase utc if isinstance(arg, Index): - arg_array = cast(ArrowExtensionArray, arg.array) + arg_array = cast("ArrowExtensionArray", arg.array) if arg_dtype.pyarrow_dtype.tz is not None: arg_array = arg_array._dt_tz_convert("UTC") else: @@ -1034,7 +1034,7 @@ def to_datetime( # ndarray[Any, Any], Series]"; expected "Union[List[Any], Tuple[Any, ...], # Union[Union[ExtensionArray, ndarray[Any, Any]], Index, Series], Series]" argc = cast( - Union[list, tuple, ExtensionArray, np.ndarray, "Series", Index], arg + "Union[list, tuple, ExtensionArray, np.ndarray, Series, Index]", arg ) cache_array = _maybe_cache(argc, format, cache, convert_listlike) except OutOfBoundsDatetime: diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 004a3555f0212..44a19de33f280 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -85,7 +85,7 @@ def dataframe_from_int_dict(data, frame_template) -> DataFrame: if arg2.columns.nlevels > 1: # mypy needs to know columns is a MultiIndex, Index doesn't # have levels attribute - arg2.columns = cast(MultiIndex, arg2.columns) + arg2.columns = cast("MultiIndex", arg2.columns) # GH 21157: Equivalent to MultiIndex.from_product( # [result_index], , # ) diff --git a/pandas/io/common.py b/pandas/io/common.py index 1a9e6b472463d..1beee02e54136 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -203,7 +203,7 @@ def validate_header_arg(header: object) -> None: if header is None: return if is_integer(header): - header = cast(int, header) + header = cast("int", header) if header < 0: # GH 27779 raise ValueError( @@ -212,7 +212,7 @@ def validate_header_arg(header: object) -> None: ) return if is_list_like(header, allow_sets=False): - header = cast(Sequence, header) + header = cast("Sequence", header) if not all(map(is_integer, header)): raise ValueError("header must be integer or list of integers") if any(i < 0 for i in header): @@ -267,7 +267,7 @@ def stringify_path( # GH 38125: some fsspec objects implement os.PathLike but have already opened a # file. This prevents opening the file a second time. infer_compression calls # this function with convert_file_like=True to infer the compression. - return cast(BaseBufferT, filepath_or_buffer) + return cast("BaseBufferT", filepath_or_buffer) if isinstance(filepath_or_buffer, os.PathLike): filepath_or_buffer = filepath_or_buffer.__fspath__() @@ -1137,7 +1137,7 @@ def _maybe_memory_map( return handle, memory_map, handles # mmap used by only read_csv - handle = cast(ReadCsvBuffer, handle) + handle = cast("ReadCsvBuffer", handle) # need to open the file first if isinstance(handle, str): diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index ebcafce8f4de2..75081b0ace143 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -664,28 +664,28 @@ def _calc_rows( if header is None: header_rows = 1 elif is_integer(header): - header = cast(int, header) + header = cast("int", header) header_rows = 1 + header else: - header = cast(Sequence, header) + header = cast("Sequence", header) header_rows = 1 + header[-1] # If there is a MultiIndex header and an index then there is also # a row containing just the index name(s) if is_list_like(header) and index_col is not None: - header = cast(Sequence, header) + header = cast("Sequence", header) if len(header) > 1: header_rows += 1 if skiprows is None: return header_rows + nrows if is_integer(skiprows): - skiprows = cast(int, skiprows) + skiprows = cast("int", skiprows) return header_rows + nrows + skiprows if is_list_like(skiprows): def f(skiprows: Sequence, x: int) -> bool: return x in skiprows - skiprows = cast(Sequence, skiprows) + skiprows = cast("Sequence", skiprows) return self._check_skiprows_func(partial(f, skiprows), header_rows + nrows) if callable(skiprows): return self._check_skiprows_func( @@ -738,7 +738,7 @@ def parse( sheets = [sheet_name] # handle same-type duplicates. - sheets = cast(Union[list[int], list[str]], list(dict.fromkeys(sheets).keys())) + sheets = cast("Union[list[int], list[str]]", list(dict.fromkeys(sheets).keys())) output = {} @@ -829,7 +829,7 @@ def _parse_sheet( is_len_one_list_header = True if is_len_one_list_header: - header = cast(Sequence[int], header)[0] + header = cast("Sequence[int]", header)[0] # forward fill and pull out names for MultiIndex column header_names = None @@ -1274,7 +1274,7 @@ def __init__( # cast ExcelWriter to avoid adding 'if self._handles is not None' self._handles = IOHandles( - cast(IO[bytes], path), compression={"compression": None} + cast("IO[bytes]", path), compression={"compression": None} ) if not isinstance(path, ExcelWriter): self._handles = get_handle( diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index f79417d11080d..f95adbcd2e61d 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -212,7 +212,7 @@ def _get_cell_value(self, cell) -> Scalar | NaTType: elif cell_type == "time": stamp = pd.Timestamp(str(cell)) # cast needed here because Scalar doesn't include datetime.time - return cast(Scalar, stamp.time()) + return cast("Scalar", stamp.time()) else: self.close() raise ValueError(f"Unrecognized type {cell_type}") diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index ba4919c9298ed..fc266983246de 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -120,7 +120,7 @@ def _write_cells( self.book.spreadsheet.addElement(wks) if validate_freeze_panes(freeze_panes): - freeze_panes = cast(tuple[int, int], freeze_panes) + freeze_panes = cast("tuple[int, int]", freeze_panes) self._create_freeze_panes(sheet_name, freeze_panes) for _ in range(startrow): diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 3055c68a93cbc..c491972f06f96 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -479,7 +479,7 @@ def _write_cells( wks.title = sheet_name if validate_freeze_panes(freeze_panes): - freeze_panes = cast(tuple[int, int], freeze_panes) + freeze_panes = cast("tuple[int, int]", freeze_panes) wks.freeze_panes = wks.cell( row=freeze_panes[0] + 1, column=freeze_panes[1] + 1 ) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 75bcb51ef4be2..0d554b6f6787d 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -228,7 +228,7 @@ def write_cols(self) -> SequenceNotStr[Hashable]: else: # self.cols is an ndarray derived from Index._get_values_for_csv, # so its entries are strings, i.e. hashable - return cast(SequenceNotStr[Hashable], self.cols) + return cast("SequenceNotStr[Hashable]", self.cols) @property def encoded_labels(self) -> list[Hashable]: diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 5fde6577e9f95..38c3f13ec9c0a 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -670,7 +670,7 @@ def _format_header_regular(self) -> Iterable[ExcelCell]: colnames = self.columns if self._has_aliases: - self.header = cast(Sequence, self.header) + self.header = cast("Sequence", self.header) if len(self.header) != len(self.columns): raise ValueError( f"Writing {len(self.columns)} cols " diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index fb799361fea67..32164d00a0c75 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -234,7 +234,7 @@ def _chk_truncate(self) -> None: is_truncated_vertically = max_rows and (len(self.series) > max_rows) series = self.series if is_truncated_vertically: - max_rows = cast(int, max_rows) + max_rows = cast("int", max_rows) if min_rows: # if min_rows is set (not None or 0), set max_rows to minimum # of both @@ -326,7 +326,7 @@ def to_string(self) -> str: if self.is_truncated_vertically: n_header_rows = 0 row_num = self.tr_row_num - row_num = cast(int, row_num) + row_num = cast("int", row_num) width = self.adj.len(fmt_values[row_num - 1]) if width > 3: dot_str = "..." @@ -566,7 +566,7 @@ def _initialize_colspace(self, col_space: ColspaceArgType | None) -> ColspaceTyp result = {} elif isinstance(col_space, (int, str)): result = {"": col_space} - result.update({column: col_space for column in self.frame.columns}) + result.update(dict.fromkeys(self.frame.columns, col_space)) elif isinstance(col_space, Mapping): for column in col_space.keys(): if column not in self.frame.columns and column != "": @@ -683,7 +683,7 @@ def _truncate_horizontally(self) -> None: *self.formatters[-col_num:], ] else: - col_num = cast(int, self.max_cols) + col_num = cast("int", self.max_cols) self.tr_frame = self.tr_frame.iloc[:, :col_num] self.tr_col_num: int = col_num @@ -701,7 +701,7 @@ def _truncate_vertically(self) -> None: _slice = np.hstack([np.arange(row_num), np.arange(_len - row_num, _len)]) self.tr_frame = self.tr_frame.iloc[_slice] else: - row_num = cast(int, self.max_rows) + row_num = cast("int", self.max_rows) self.tr_frame = self.tr_frame.iloc[:row_num, :] self.tr_row_num = row_num @@ -722,7 +722,7 @@ def _get_strcols_without_index(self) -> list[list[str]]: if is_list_like(self.header): # cast here since can't be bool if is_list_like - self.header = cast(list[str], self.header) + self.header = cast("list[str]", self.header) if len(self.header) != len(self.columns): raise ValueError( f"Writing {len(self.columns)} cols " @@ -768,7 +768,7 @@ def format_col(self, i: int) -> list[str]: def _get_formatter(self, i: str | int) -> Callable | None: if isinstance(self.formatters, (list, tuple)): if is_integer(i): - i = cast(int, i) + i = cast("int", i) return self.formatters[i] else: return None @@ -803,7 +803,7 @@ def _get_formatted_column_labels(self, frame: DataFrame) -> list[list[str]]: def _get_formatted_index(self, frame: DataFrame) -> list[str]: # Note: this is only used by to_string() and to_latex(), not by # to_html(). so safe to cast col_space here. - col_space = {k: cast(int, v) for k, v in self.col_space.items()} + col_space = {k: cast("int", v) for k, v in self.col_space.items()} index = frame.index columns = frame.columns fmt = self._get_formatter("__index__") @@ -1109,13 +1109,13 @@ def format_array( fmt_klass: type[_GenericArrayFormatter] if lib.is_np_dtype(values.dtype, "M"): fmt_klass = _Datetime64Formatter - values = cast(DatetimeArray, values) + values = cast("DatetimeArray", values) elif isinstance(values.dtype, DatetimeTZDtype): fmt_klass = _Datetime64TZFormatter - values = cast(DatetimeArray, values) + values = cast("DatetimeArray", values) elif lib.is_np_dtype(values.dtype, "m"): fmt_klass = _Timedelta64Formatter - values = cast(TimedeltaArray, values) + values = cast("TimedeltaArray", values) elif isinstance(values.dtype, ExtensionDtype): fmt_klass = _ExtensionArrayFormatter elif lib.is_np_dtype(values.dtype, "fc"): diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index c4884ef4ce4a9..4b34e7b45e580 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -66,7 +66,7 @@ def __init__( self.escape = self.fmt.escape self.show_dimensions = self.fmt.show_dimensions if border is None or border is True: - border = cast(int, get_option("display.html.border")) + border = cast("int", get_option("display.html.border")) elif not border: border = None diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 482ed316c7ce4..6752c83d5169b 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -1225,7 +1225,7 @@ def format( data = self.data.loc[subset] if not isinstance(formatter, dict): - formatter = {col: formatter for col in data.columns} + formatter = dict.fromkeys(data.columns, formatter) cis = self.columns.get_indexer_for(data.columns) ris = self.index.get_indexer_for(data.index) @@ -1411,7 +1411,7 @@ def format_index( return self # clear the formatter / revert to default and avoid looping if not isinstance(formatter, dict): - formatter = {level: formatter for level in levels_} + formatter = dict.fromkeys(levels_, formatter) else: formatter = { obj._get_level_number(level): formatter_ @@ -1708,7 +1708,7 @@ def format_index_names( return self # clear the formatter / revert to default and avoid looping if not isinstance(formatter, dict): - formatter = {level: formatter for level in levels_} + formatter = dict.fromkeys(levels_, formatter) else: formatter = { obj._get_level_number(level): formatter_ diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index c283f600eb971..7ab33213bc07b 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -715,7 +715,7 @@ def _get_empty_meta( # if dtype == None, default will be object. dtype_dict = defaultdict(lambda: dtype) else: - dtype = cast(dict, dtype) + dtype = cast("dict", dtype) dtype_dict = defaultdict( lambda: None, {columns[k] if is_integer(k) else k: v for k, v in dtype.items()}, diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index e7b5c7f06a79a..c461422af100c 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -227,7 +227,7 @@ class MyDialect(csv.Dialect): self.pos += 1 line = f.readline() lines = self._check_comments([[line]])[0] - lines_str = cast(list[str], lines) + lines_str = cast("list[str]", lines) # since `line` was a string, lines will be a list containing # only a single string diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index a689cfbcb1418..98f716de06397 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1760,7 +1760,7 @@ def info(self) -> str: if self.is_open: lkeys = sorted(self.keys()) - if len(lkeys): + if lkeys: keys = [] values = [] @@ -4540,7 +4540,7 @@ def write_data(self, chunksize: int | None, dropna: bool = False) -> None: masks.append(mask.astype("u1", copy=False)) # consolidate masks - if len(masks): + if masks: mask = masks[0] for m in masks[1:]: mask = mask & m @@ -4660,7 +4660,7 @@ def delete( groups = list(diff[diff > 1].index) # 1 group - if not len(groups): + if not groups: groups = [0] # final element @@ -5143,7 +5143,7 @@ def _maybe_convert_for_string_atom( if bvalues.dtype != object: return bvalues - bvalues = cast(np.ndarray, bvalues) + bvalues = cast("np.ndarray", bvalues) dtype_name = bvalues.dtype.name inferred_type = lib.infer_dtype(bvalues, skipna=False) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 0e0f07c0f8ff3..de82a0255fa58 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1344,7 +1344,7 @@ def _harmonize_columns( def _sqlalchemy_type(self, col: Index | Series): dtype: DtypeArg = self.dtype or {} if is_dict_like(dtype): - dtype = cast(dict, dtype) + dtype = cast("dict", dtype) if col.name in dtype: return dtype[col.name] @@ -1901,9 +1901,9 @@ def prep_table( # Type[str], Type[float], Type[int], Type[complex], Type[bool], # Type[object]]]]"; expected type "Union[ExtensionDtype, str, # dtype[Any], Type[object]]" - dtype = {col_name: dtype for col_name in frame} # type: ignore[misc] + dtype = dict.fromkeys(frame, dtype) # type: ignore[misc] else: - dtype = cast(dict, dtype) + dtype = cast("dict", dtype) from sqlalchemy.types import TypeEngine @@ -2615,7 +2615,7 @@ def _create_table_setup(self): ] ix_cols = [cname for cname, _, is_index in column_names_and_types if is_index] - if len(ix_cols): + if ix_cols: cnames = "_".join(ix_cols) cnames_br = ",".join([escape(c) for c in ix_cols]) create_stmts.append( @@ -2633,7 +2633,7 @@ def _create_table_setup(self): def _sql_type_name(self, col): dtype: DtypeArg = self.dtype or {} if is_dict_like(dtype): - dtype = cast(dict, dtype) + dtype = cast("dict", dtype) if col.name in dtype: return dtype[col.name] @@ -2859,9 +2859,9 @@ def to_sql( # Type[str], Type[float], Type[int], Type[complex], Type[bool], # Type[object]]]]"; expected type "Union[ExtensionDtype, str, # dtype[Any], Type[object]]" - dtype = {col_name: dtype for col_name in frame} # type: ignore[misc] + dtype = dict.fromkeys(frame, dtype) # type: ignore[misc] else: - dtype = cast(dict, dtype) + dtype = cast("dict", dtype) for col, my_type in dtype.items(): if not isinstance(my_type, str): diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 34d95fb59a21c..a269c850fa6fa 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1503,7 +1503,7 @@ def _setup_dtype(self) -> np.dtype: dtypes = [] # Convert struct data types to numpy data type for i, typ in enumerate(self._typlist): if typ in self.NUMPY_TYPE_MAP: - typ = cast(str, typ) # only strs in NUMPY_TYPE_MAP + typ = cast("str", typ) # only strs in NUMPY_TYPE_MAP dtypes.append((f"s{i}", f"{self._byteorder}{self.NUMPY_TYPE_MAP[typ]}")) else: dtypes.append((f"s{i}", f"S{typ}")) @@ -1832,13 +1832,13 @@ def _do_convert_missing(self, data: DataFrame, convert_missing: bool) -> DataFra if fmt not in self.OLD_VALID_RANGE: continue - fmt = cast(str, fmt) # only strs in OLD_VALID_RANGE + fmt = cast("str", fmt) # only strs in OLD_VALID_RANGE nmin, nmax = self.OLD_VALID_RANGE[fmt] else: if fmt not in self.VALID_RANGE: continue - fmt = cast(str, fmt) # only strs in VALID_RANGE + fmt = cast("str", fmt) # only strs in VALID_RANGE nmin, nmax = self.VALID_RANGE[fmt] series = data.iloc[:, i] diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py index 4c00049075d03..17ae63a1d19d8 100644 --- a/pandas/plotting/_matplotlib/converter.py +++ b/pandas/plotting/_matplotlib/converter.py @@ -88,7 +88,7 @@ def wrapper(*args, **kwargs): with pandas_converters(): return func(*args, **kwargs) - return cast(F, wrapper) + return cast("F", wrapper) @contextlib.contextmanager diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 1035150302d2c..a380df38bbe2a 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -600,7 +600,7 @@ def _axes_and_fig(self) -> tuple[Sequence[Axes], Figure]: elif self.logy == "sym" or self.loglog == "sym": [a.set_yscale("symlog") for a in axes] - axes_seq = cast(Sequence["Axes"], axes) + axes_seq = cast("Sequence[Axes]", axes) return axes_seq, fig @property diff --git a/pandas/plotting/_matplotlib/style.py b/pandas/plotting/_matplotlib/style.py index 962f9711d9916..2c7782ccdccf9 100644 --- a/pandas/plotting/_matplotlib/style.py +++ b/pandas/plotting/_matplotlib/style.py @@ -199,10 +199,10 @@ def _get_colors_from_color( raise ValueError(f"Invalid color argument: {color}") if _is_single_color(color): - color = cast(Color, color) + color = cast("Color", color) return [color] - color = cast(Collection[Color], color) + color = cast("Collection[Color]", color) return list(_gen_list_of_colors_from_iterable(color)) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 2d47cd851ad10..dde1158dc7951 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -334,7 +334,7 @@ def test_apply_broadcast_scalars(float_frame): def test_apply_broadcast_scalars_axis1(float_frame): result = float_frame.apply(np.mean, axis=1, result_type="broadcast") m = float_frame.mean(axis=1) - expected = DataFrame({c: m for c in float_frame.columns}) + expected = DataFrame(dict.fromkeys(float_frame.columns, m)) tm.assert_frame_equal(result, expected) @@ -361,7 +361,7 @@ def test_apply_broadcast_lists_index(float_frame): ) m = list(range(len(float_frame.index))) expected = DataFrame( - {c: m for c in float_frame.columns}, + dict.fromkeys(float_frame.columns, m), dtype="float64", index=float_frame.index, ) diff --git a/pandas/tests/dtypes/cast/test_maybe_box_native.py b/pandas/tests/dtypes/cast/test_maybe_box_native.py index 3f62f31dac219..151586962d517 100644 --- a/pandas/tests/dtypes/cast/test_maybe_box_native.py +++ b/pandas/tests/dtypes/cast/test_maybe_box_native.py @@ -17,7 +17,7 @@ "obj,expected_dtype", [ (b"\x00\x10", bytes), - (int(4), int), + ((4), int), (np.uint(4), int), (np.int32(-4), int), (np.uint8(4), int), diff --git a/pandas/tests/extension/date/array.py b/pandas/tests/extension/date/array.py index 0c51570189a7c..d2459075b6d82 100644 --- a/pandas/tests/extension/date/array.py +++ b/pandas/tests/extension/date/array.py @@ -97,7 +97,7 @@ def __init__( # check if all elements have the same type if any(not isinstance(x, np.ndarray) for x in dates): raise TypeError("invalid type") - ly, lm, ld = (len(cast(np.ndarray, d)) for d in dates) + ly, lm, ld = (len(cast("np.ndarray", d)) for d in dates) if not ly == lm == ld: raise ValueError( f"tuple members must have the same length: {(ly, lm, ld)}" diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 25129111180d6..00408bb746ab0 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -199,7 +199,7 @@ def _supports_accumulation(self, ser: pd.Series, op_name: str) -> bool: return op_name in ["cummin", "cummax", "cumsum"] def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result): - dtype = cast(StringDtype, tm.get_dtype(obj)) + dtype = cast("StringDtype", tm.get_dtype(obj)) if op_name in ["__add__", "__radd__"]: cast_to = dtype elif dtype.na_value is np.nan: diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py index 3a68d38cc0bde..213fa2c01cef4 100644 --- a/pandas/tests/io/parser/test_na_values.py +++ b/pandas/tests/io/parser/test_na_values.py @@ -546,7 +546,7 @@ def test_na_values_dict_null_column_name(all_parsers): parser = all_parsers data = ",x,y\n\nMA,1,2\nNA,2,1\nOA,,3" names = [None, "x", "y"] - na_values = {name: STR_NA_VALUES for name in names} + na_values = dict.fromkeys(names, STR_NA_VALUES) dtype = {None: "object", "x": "float64", "y": "float64"} if parser.engine == "pyarrow": diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 5f4a100e7ccc7..f82451a2be84d 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -632,7 +632,7 @@ def test_constructor_maskedarray_hardened(self): def test_series_ctor_plus_datetimeindex(self): rng = date_range("20090415", "20090519", freq="B") - data = {k: 1 for k in rng} + data = dict.fromkeys(rng, 1) result = Series(data, index=rng) assert result.index.is_(rng) diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index a1a0d51a7c72b..944cc467c4e89 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -211,7 +211,7 @@ def wrapper(*args, **kwargs) -> Callable[..., Any]: kwargs[new_arg_name] = new_arg_value return func(*args, **kwargs) - return cast(F, wrapper) + return cast("F", wrapper) return _deprecate_kwarg