From e7d15f018f75990b7d6671e490454e9e73b84fd3 Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Fri, 17 Nov 2023 11:39:30 +0700 Subject: [PATCH 1/8] add tests --- .../timestamp/methods/test_tz_localize.py | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/pandas/tests/scalar/timestamp/methods/test_tz_localize.py b/pandas/tests/scalar/timestamp/methods/test_tz_localize.py index 247a583bc38f3..9df0a023730de 100644 --- a/pandas/tests/scalar/timestamp/methods/test_tz_localize.py +++ b/pandas/tests/scalar/timestamp/methods/test_tz_localize.py @@ -123,6 +123,44 @@ def test_tz_localize_nonexistent(self, stamp, tz): ts.tz_localize(tz, nonexistent="raise") assert ts.tz_localize(tz, nonexistent="NaT") is NaT + @pytest.mark.parametrize( + "stamp, tz, forward_expected, backward_expected", + [ + ( + "2015-03-29 02:00:00", + "Europe/Warsaw", + "2015-03-29 03:00:00", + "2015-03-29 01:59:59", + ), # utc+1 -> utc+2 + ( + "2023-03-12 02:00:00", + "America/Los_Angeles", + "2023-03-12 03:00:00", + "2023-03-12 01:59:59", + ), # utc-8 -> utc-7 + ( + "2023-03-26 01:00:00", + "Europe/London", + "2023-03-26 02:00:00", + "2023-03-26 00:59:59", + ), # utc+0 -> utc+1 + ( + "2023-03-26 00:00:00", + "Atlantic/Azores", + "2023-03-26 01:00:00", + "2023-03-25 23:59:59", + ), # utc-1 -> utc+0 + ], + ) + def test_tz_localize_nonexistent_shift( + self, stamp, tz, forward_expected, backward_expected + ): + ts = Timestamp(stamp) + forward_ts = ts.tz_localize(tz, nonexistent="shift_forward") + assert forward_ts == Timestamp(forward_expected, tz=tz) + backward_ts = ts.tz_localize(tz, nonexistent="shift_backward") + assert backward_ts == Timestamp(backward_expected, tz=tz) + def test_tz_localize_ambiguous_raise(self): # GH#13057 ts = Timestamp("2015-11-1 01:00") From 8c1e35af8bfd63a2d17aefe3ad4d4b055681295c Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Fri, 17 Nov 2023 11:42:43 +0700 Subject: [PATCH 2/8] fix --- pandas/_libs/tslibs/tzconversion.pyx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index e77a385113e93..20ba679b7ec94 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -416,6 +416,9 @@ timedelta-like} else: delta_idx = bisect_right_i8(info.tdata, new_local, info.ntrans) + if (shift_forward or shift_delta > 0) and \ + info.deltas[delta_idx-1] >= 0: + delta_idx_offset = 1 delta_idx = delta_idx - delta_idx_offset result[i] = new_local - info.deltas[delta_idx] From 41d096b7c6d82831a87eedfe6e4e0d7b687be50f Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Fri, 17 Nov 2023 13:33:19 +0700 Subject: [PATCH 3/8] add whatsnew --- doc/source/whatsnew/v2.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index efa4a52993a90..1c2a37cb829b9 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -360,7 +360,7 @@ Timezones ^^^^^^^^^ - Bug in :class:`AbstractHolidayCalendar` where timezone data was not propagated when computing holiday observances (:issue:`54580`) - Bug in :class:`Timestamp` construction with an ambiguous value and a ``pytz`` timezone failing to raise ``pytz.AmbiguousTimeError`` (:issue:`55657`) -- +- Bug in :meth:`Timestamp.tz_localize` with ``nonexistent="shift_forward`` around UTC+0 during DST (:issue:`51501`) Numeric ^^^^^^^ From 4482dd8ff2d147141f179f2863dfc7b893adb93c Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Sat, 18 Nov 2023 10:18:53 +0700 Subject: [PATCH 4/8] refactor --- pandas/_libs/tslibs/tzconversion.pyx | 36 +++++++++++++--------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 20ba679b7ec94..8a328acef84ac 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -237,7 +237,7 @@ timedelta-like} Py_ssize_t i, n = vals.shape[0] Py_ssize_t delta_idx_offset, delta_idx int64_t v, left, right, val, new_local, remaining_mins - int64_t first_delta, delta + int64_t delta int64_t shift_delta = 0 ndarray[int64_t] result_a, result_b, dst_hours int64_t[::1] result @@ -327,22 +327,6 @@ timedelta-like} if infer_dst: dst_hours = _get_dst_hours(vals, result_a, result_b, creso=creso) - # Pre-compute delta_idx_offset that will be used if we go down non-existent - # paths. - # Shift the delta_idx by if the UTC offset of - # the target tz is greater than 0 and we're moving forward - # or vice versa - # TODO: delta_idx_offset and info.deltas are needed for zoneinfo timezones, - # but are not applicable for all timezones. Setting the former to 0 and - # length checking the latter avoids UB, but this could use a larger refactor - delta_idx_offset = 0 - if len(info.deltas): - first_delta = info.deltas[0] - if (shift_forward or shift_delta > 0) and first_delta > 0: - delta_idx_offset = 1 - elif (shift_backward or shift_delta < 0) and first_delta < 0: - delta_idx_offset = 1 - for i in range(n): val = vals[i] left = result_a[i] @@ -416,9 +400,21 @@ timedelta-like} else: delta_idx = bisect_right_i8(info.tdata, new_local, info.ntrans) - if (shift_forward or shift_delta > 0) and \ - info.deltas[delta_idx-1] >= 0: - delta_idx_offset = 1 + + # Shift the delta_idx by if the UTC offset of + # the target tz is greater than 0 and we're moving forward + # or vice versa + # TODO: delta_idx_offset and info.deltas are needed for zoneinfo + # timezones, but are not applicable for all timezones. Setting the + # former to 0 and length checking the latter avoids UB, but this + # could use a larger refactor + delta_idx_offset = 0 + if len(info.deltas): + delta = info.deltas[delta_idx-1] + if (shift_forward or shift_delta > 0) and delta >= 0: + delta_idx_offset = 1 + elif (shift_backward or shift_delta < 0) and delta < 0: + delta_idx_offset = 1 delta_idx = delta_idx - delta_idx_offset result[i] = new_local - info.deltas[delta_idx] From 65dbc2b7b6c2a8d1a110c8b76ee02fedd3b4edee Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Sat, 18 Nov 2023 11:28:22 +0700 Subject: [PATCH 5/8] Revert "refactor" This reverts commit 4482dd8ff2d147141f179f2863dfc7b893adb93c. --- pandas/_libs/tslibs/tzconversion.pyx | 36 +++++++++++++++------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 8a328acef84ac..20ba679b7ec94 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -237,7 +237,7 @@ timedelta-like} Py_ssize_t i, n = vals.shape[0] Py_ssize_t delta_idx_offset, delta_idx int64_t v, left, right, val, new_local, remaining_mins - int64_t delta + int64_t first_delta, delta int64_t shift_delta = 0 ndarray[int64_t] result_a, result_b, dst_hours int64_t[::1] result @@ -327,6 +327,22 @@ timedelta-like} if infer_dst: dst_hours = _get_dst_hours(vals, result_a, result_b, creso=creso) + # Pre-compute delta_idx_offset that will be used if we go down non-existent + # paths. + # Shift the delta_idx by if the UTC offset of + # the target tz is greater than 0 and we're moving forward + # or vice versa + # TODO: delta_idx_offset and info.deltas are needed for zoneinfo timezones, + # but are not applicable for all timezones. Setting the former to 0 and + # length checking the latter avoids UB, but this could use a larger refactor + delta_idx_offset = 0 + if len(info.deltas): + first_delta = info.deltas[0] + if (shift_forward or shift_delta > 0) and first_delta > 0: + delta_idx_offset = 1 + elif (shift_backward or shift_delta < 0) and first_delta < 0: + delta_idx_offset = 1 + for i in range(n): val = vals[i] left = result_a[i] @@ -400,21 +416,9 @@ timedelta-like} else: delta_idx = bisect_right_i8(info.tdata, new_local, info.ntrans) - - # Shift the delta_idx by if the UTC offset of - # the target tz is greater than 0 and we're moving forward - # or vice versa - # TODO: delta_idx_offset and info.deltas are needed for zoneinfo - # timezones, but are not applicable for all timezones. Setting the - # former to 0 and length checking the latter avoids UB, but this - # could use a larger refactor - delta_idx_offset = 0 - if len(info.deltas): - delta = info.deltas[delta_idx-1] - if (shift_forward or shift_delta > 0) and delta >= 0: - delta_idx_offset = 1 - elif (shift_backward or shift_delta < 0) and delta < 0: - delta_idx_offset = 1 + if (shift_forward or shift_delta > 0) and \ + info.deltas[delta_idx-1] >= 0: + delta_idx_offset = 1 delta_idx = delta_idx - delta_idx_offset result[i] = new_local - info.deltas[delta_idx] From 1f8946111618fd0e4847757aebacd6b1ab198319 Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Sat, 18 Nov 2023 11:39:46 +0700 Subject: [PATCH 6/8] fix bug --- pandas/_libs/tslibs/tzconversion.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 20ba679b7ec94..fec3eb25aaf1f 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -418,9 +418,9 @@ timedelta-like} delta_idx = bisect_right_i8(info.tdata, new_local, info.ntrans) if (shift_forward or shift_delta > 0) and \ info.deltas[delta_idx-1] >= 0: - delta_idx_offset = 1 - - delta_idx = delta_idx - delta_idx_offset + delta_idx = delta_idx - 1 + else: + delta_idx = delta_idx - delta_idx_offset result[i] = new_local - info.deltas[delta_idx] elif fill_nonexist: result[i] = NPY_NAT From 9a6cd6693c408da5c65d377a5eda8874aad234b9 Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Tue, 21 Nov 2023 13:56:12 +0700 Subject: [PATCH 7/8] update --- pandas/_libs/tslibs/tzconversion.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index fec3eb25aaf1f..45dc74567941c 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -416,8 +416,10 @@ timedelta-like} else: delta_idx = bisect_right_i8(info.tdata, new_local, info.ntrans) + # Logic similar to the precompute portion right before this for-loop + # But check the current delta in case we are moving in/out of UTC+0 if (shift_forward or shift_delta > 0) and \ - info.deltas[delta_idx-1] >= 0: + info.deltas[delta_idx - 1] >= 0: delta_idx = delta_idx - 1 else: delta_idx = delta_idx - delta_idx_offset From 6accd916263c98315b15f3c9c6bffa1ddb29cc63 Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Tue, 21 Nov 2023 13:59:04 +0700 Subject: [PATCH 8/8] more cmt --- pandas/_libs/tslibs/tzconversion.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 45dc74567941c..2c4f0cd14db13 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -416,8 +416,8 @@ timedelta-like} else: delta_idx = bisect_right_i8(info.tdata, new_local, info.ntrans) - # Logic similar to the precompute portion right before this for-loop - # But check the current delta in case we are moving in/out of UTC+0 + # Logic similar to the precompute section. But check the current + # delta in case we are moving between UTC+0 and non-zero timezone if (shift_forward or shift_delta > 0) and \ info.deltas[delta_idx - 1] >= 0: delta_idx = delta_idx - 1