diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index a312fdc6cda22..5256bdc988995 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1173,15 +1173,15 @@ cdef class Seen: or self.nat_) -cdef object _try_infer_map(object v): +cdef object _try_infer_map(object dtype): """ If its in our map, just return the dtype. """ cdef: object val str attr - for attr in ['name', 'kind', 'base']: - val = getattr(v.dtype, attr) + for attr in ["name", "kind", "base"]: + val = getattr(dtype, attr) if val in _TYPE_MAP: return _TYPE_MAP[val] return None @@ -1294,44 +1294,49 @@ def infer_dtype(value: object, skipna: bool = True) -> str: if util.is_array(value): values = value - elif hasattr(value, 'dtype'): + elif hasattr(value, "inferred_type") and skipna is False: + # Index, use the cached attribute if possible, populate the cache otherwise + return value.inferred_type + elif hasattr(value, "dtype"): # this will handle ndarray-like # e.g. categoricals - try: - values = getattr(value, '_values', getattr(value, 'values', value)) - except TypeError: - # This gets hit if we have an EA, since cython expects `values` - # to be an ndarray - value = _try_infer_map(value) + dtype = value.dtype + if not isinstance(dtype, np.dtype): + value = _try_infer_map(value.dtype) if value is not None: return value - # its ndarray like but we can't handle + # its ndarray-like but we can't handle raise ValueError(f"cannot infer type for {type(value)}") + # Unwrap Series/Index + values = np.asarray(value) + else: if not isinstance(value, list): value = list(value) + from pandas.core.dtypes.cast import ( construct_1d_object_array_from_listlike) values = construct_1d_object_array_from_listlike(value) # make contiguous - values = values.ravel() + # for f-contiguous array 1000 x 1000, passing order="K" gives 5000x speedup + values = values.ravel(order="K") - val = _try_infer_map(values) + val = _try_infer_map(values.dtype) if val is not None: return val if values.dtype != np.object_: - values = values.astype('O') + values = values.astype("O") if skipna: values = values[~isnaobj(values)] n = len(values) if n == 0: - return 'empty' + return "empty" # try to use a valid value for i in range(n): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 69e9b77633b56..4bc5599297066 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1944,7 +1944,7 @@ def inferred_type(self) -> str_t: """ Return a string of the type inferred from the values. """ - return lib.infer_dtype(self, skipna=False) + return lib.infer_dtype(self._values, skipna=False) @cache_readonly def is_all_dates(self) -> bool: