From c9e81760e809937461c405e345704636b143f353 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 13 Apr 2020 18:27:43 -0700 Subject: [PATCH 1/2] REF: use cached inferred_type when calling lib.infer_dtype(index) --- pandas/_libs/lib.pyx | 44 +++++++++++++++++++++---------------- pandas/core/indexes/base.py | 2 +- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 276c2d5198831..4db8b98945f91 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1173,14 +1173,15 @@ cdef class Seen: or self.nat_) -cdef _try_infer_map(v): +cdef _try_infer_map(dtype): """ If its in our map, just return the dtype. """ cdef: - object attr, val - for attr in ['name', 'kind', 'base']: - val = getattr(v.dtype, attr) + str attr + object val + for attr in ["name", "kind", "base"]: + val = getattr(dtype, attr) if val in _TYPE_MAP: return _TYPE_MAP[val] return None @@ -1293,44 +1294,49 @@ def infer_dtype(value: object, skipna: bool = True) -> str: if util.is_array(value): values = value - elif hasattr(value, 'dtype'): + elif hasattr(value, "inferred_type") and skipna is False: + # Index, use the cached attribute if possible, populate the cache otherwise + return value.inferred_type + elif hasattr(value, "dtype"): # this will handle ndarray-like # e.g. categoricals - try: - values = getattr(value, '_values', getattr(value, 'values', value)) - except TypeError: - # This gets hit if we have an EA, since cython expects `values` - # to be an ndarray - value = _try_infer_map(value) + dtype = value.dtype + if not isinstance(dtype, np.dtype): + value = _try_infer_map(value.dtype) if value is not None: return value - # its ndarray like but we can't handle + # its ndarray-like but we can't handle raise ValueError(f"cannot infer type for {type(value)}") + # Unwrap Series/Index + values = np.asarray(value) + else: if not isinstance(value, list): value = list(value) - from pandas.core.dtypes.cast import ( - construct_1d_object_array_from_listlike) - values = construct_1d_object_array_from_listlike(value) + + # See also: construct_1d_object_array_from_listlike + values = np.empty(len(value), dtype=object) + values[:] = value # make contiguous - values = values.ravel() + # for f-contiguous array 1000 x 1000, passing order="K" gives 5000x speedup + values = values.ravel(order="K") - val = _try_infer_map(values) + val = _try_infer_map(values.dtype) if val is not None: return val if values.dtype != np.object_: - values = values.astype('O') + values = values.astype("O") if skipna: values = values[~isnaobj(values)] n = len(values) if n == 0: - return 'empty' + return "empty" # try to use a valid value for i in range(n): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index df58593bc930c..5a75e2d2214b0 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1975,7 +1975,7 @@ def inferred_type(self) -> str_t: """ Return a string of the type inferred from the values. """ - return lib.infer_dtype(self, skipna=False) + return lib.infer_dtype(self._values, skipna=False) @cache_readonly def is_all_dates(self) -> bool: From 9327fd46077282d7b6b9329e7c9610b0d1c47e57 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 14 Apr 2020 18:46:22 -0700 Subject: [PATCH 2/2] revert --- pandas/_libs/lib.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 579f440f1e3e3..bbcd3363efed1 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1313,9 +1313,9 @@ def infer_dtype(value: object, skipna: bool = True) -> str: if not isinstance(value, list): value = list(value) - # See also: construct_1d_object_array_from_listlike - values = np.empty(len(value), dtype=object) - values[:] = value + from pandas.core.dtypes.cast import ( + construct_1d_object_array_from_listlike) + values = construct_1d_object_array_from_listlike(value) # make contiguous # for f-contiguous array 1000 x 1000, passing order="K" gives 5000x speedup