Skip to content

REF: use cached inferred_type when calling lib.infer_dtype(index) #33537

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 20 additions & 15 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1173,15 +1173,15 @@ cdef class Seen:
or self.nat_)


cdef object _try_infer_map(object v):
cdef object _try_infer_map(object dtype):
"""
If its in our map, just return the dtype.
"""
cdef:
object val
str attr
for attr in ['name', 'kind', 'base']:
val = getattr(v.dtype, attr)
for attr in ["name", "kind", "base"]:
val = getattr(dtype, attr)
if val in _TYPE_MAP:
return _TYPE_MAP[val]
return None
Expand Down Expand Up @@ -1294,44 +1294,49 @@ def infer_dtype(value: object, skipna: bool = True) -> str:

if util.is_array(value):
values = value
elif hasattr(value, 'dtype'):
elif hasattr(value, "inferred_type") and skipna is False:
# Index, use the cached attribute if possible, populate the cache otherwise
return value.inferred_type
elif hasattr(value, "dtype"):
# this will handle ndarray-like
# e.g. categoricals
try:
values = getattr(value, '_values', getattr(value, 'values', value))
except TypeError:
# This gets hit if we have an EA, since cython expects `values`
# to be an ndarray
value = _try_infer_map(value)
dtype = value.dtype
if not isinstance(dtype, np.dtype):
value = _try_infer_map(value.dtype)
if value is not None:
return value

# its ndarray like but we can't handle
# its ndarray-like but we can't handle
raise ValueError(f"cannot infer type for {type(value)}")

# Unwrap Series/Index
values = np.asarray(value)

else:
if not isinstance(value, list):
value = list(value)

from pandas.core.dtypes.cast import (
construct_1d_object_array_from_listlike)
values = construct_1d_object_array_from_listlike(value)

# make contiguous
values = values.ravel()
# for f-contiguous array 1000 x 1000, passing order="K" gives 5000x speedup
values = values.ravel(order="K")

val = _try_infer_map(values)
val = _try_infer_map(values.dtype)
if val is not None:
return val

if values.dtype != np.object_:
values = values.astype('O')
values = values.astype("O")

if skipna:
values = values[~isnaobj(values)]

n = len(values)
if n == 0:
return 'empty'
return "empty"

# try to use a valid value
for i in range(n):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1944,7 +1944,7 @@ def inferred_type(self) -> str_t:
"""
Return a string of the type inferred from the values.
"""
return lib.infer_dtype(self, skipna=False)
return lib.infer_dtype(self._values, skipna=False)

@cache_readonly
def is_all_dates(self) -> bool:
Expand Down