From 1172bef9d2d07b8c4b2d882a0001d659e4733acf Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 13 Aug 2018 18:42:08 -0700 Subject: [PATCH 1/3] de-duplicate index validation code --- pandas/_libs/index.pyx | 16 ++---------- pandas/_libs/util.pxd | 55 +++++++++++++++++++++++++++--------------- 2 files changed, 38 insertions(+), 33 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 5918560cf1436..0161d51601816 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -51,21 +51,9 @@ cpdef get_value_at(ndarray arr, object loc, object tz=None): cpdef object get_value_box(ndarray arr, object loc): cdef: - Py_ssize_t i, sz - - if util.is_float_object(loc): - casted = int(loc) - if casted == loc: - loc = casted - i = loc - sz = cnp.PyArray_SIZE(arr) - - if i < 0 and sz > 0: - i += sz - - if i >= sz or sz == 0 or i < 0: - raise IndexError('index out of bounds') + Py_ssize_t i + i = util.validate_indexer(arr, loc) return get_value_at(arr, i, tz=None) diff --git a/pandas/_libs/util.pxd b/pandas/_libs/util.pxd index 31843a755e7b1..9e3227645c042 100644 --- a/pandas/_libs/util.pxd +++ b/pandas/_libs/util.pxd @@ -44,23 +44,50 @@ ctypedef fused numeric: cnp.float64_t -cdef inline object get_value_at(ndarray arr, object loc): +cdef inline Py_ssize_t validate_indexer(ndarray arr, object loc) except? -1: + """ + Cast the given indexer `loc` to an integer. If it is negative, i.e. a + python-style indexing-from-the-end indexer, translate it to a + from-the-front indexer. Raise if this is not possible. + + Parameters + ---------- + arr : ndarray + loc : object + + Returns + ------- + idx : Py_ssize_t + + Raises + ------ + IndexError + """ cdef: - Py_ssize_t i, sz + Py_ssize_t idx, size int casted if is_float_object(loc): casted = int(loc) if casted == loc: loc = casted - i = loc - sz = cnp.PyArray_SIZE(arr) - if i < 0 and sz > 0: - i += sz - elif i >= sz or sz == 0: + idx = loc + size = cnp.PyArray_SIZE(arr) + + if idx < 0 and size > 0: + idx += size + if idx >= size or size == 0 or idx < 0: raise IndexError('index out of bounds') + return idx + + +cdef inline object get_value_at(ndarray arr, object loc): + cdef: + Py_ssize_t i + + i = validate_indexer(arr, loc) return get_value_1d(arr, i) @@ -71,19 +98,9 @@ cdef inline set_value_at_unsafe(ndarray arr, object loc, object value): flag above the loop and then eschew the check on each iteration. """ cdef: - Py_ssize_t i, sz - if is_float_object(loc): - casted = int(loc) - if casted == loc: - loc = casted - i = loc - sz = cnp.PyArray_SIZE(arr) - - if i < 0: - i += sz - elif i >= sz: - raise IndexError('index out of bounds') + Py_ssize_t i + i = validate_indexer(arr, loc) assign_value_1d(arr, i, value) From 92849f4f3a2977992a72a851dba5523f80702000 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 14 Aug 2018 14:12:17 -0700 Subject: [PATCH 2/3] remove question mark --- pandas/_libs/util.pxd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/util.pxd b/pandas/_libs/util.pxd index 9e3227645c042..acc4048b2169b 100644 --- a/pandas/_libs/util.pxd +++ b/pandas/_libs/util.pxd @@ -44,7 +44,7 @@ ctypedef fused numeric: cnp.float64_t -cdef inline Py_ssize_t validate_indexer(ndarray arr, object loc) except? -1: +cdef inline Py_ssize_t validate_indexer(ndarray arr, object loc) except -1: """ Cast the given indexer `loc` to an integer. If it is negative, i.e. a python-style indexing-from-the-end indexer, translate it to a From 68e0a67ad8c5599695a0f0e672301431fac99f99 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 14 Aug 2018 19:16:00 -0700 Subject: [PATCH 3/3] Per suggestion, avoid duplicated validation --- pandas/_libs/index.pyx | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 0161d51601816..c2eb07c652400 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -50,11 +50,7 @@ cpdef get_value_at(ndarray arr, object loc, object tz=None): cpdef object get_value_box(ndarray arr, object loc): - cdef: - Py_ssize_t i - - i = util.validate_indexer(arr, loc) - return get_value_at(arr, i, tz=None) + return get_value_at(arr, loc, tz=None) # Don't populate hash tables in monotonic indexes larger than this