From cfb760cb33a33feb05b44f25adf48baad2235237 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 3 Dec 2017 16:53:15 -0800 Subject: [PATCH 1/9] handle DST appropriately in Timestamp.replace --- doc/source/whatsnew/v0.22.0.txt | 2 +- pandas/_libs/tslibs/timestamps.pyx | 15 ++++++++++++--- pandas/tests/scalar/test_timestamp.py | 20 ++++++++++++++++++++ 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 304ccd1f9350b..790af26258292 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -248,4 +248,4 @@ Other - Fixed a bug where creating a Series from an array that contains both tz-naive and tz-aware values will result in a Series whose dtype is tz-aware instead of object (:issue:`16406`) - Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`) - Adding a ``Period`` object to a ``datetime`` or ``Timestamp`` object will now correctly raise a ``TypeError`` (:issue:`17983`) -- +- :func:`Timestamp.replace` will now handle Daylight Savings transitions gracefully (:issue:`18319`) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index cf0c0e2c01d60..db97944591797 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -32,7 +32,7 @@ from np_datetime cimport (reverse_ops, cmp_scalar, check_dts_bounds, is_leapyear) from timedeltas import Timedelta from timedeltas cimport delta_to_nanoseconds -from timezones cimport get_timezone, is_utc, maybe_get_tz +from timezones cimport get_timezone, is_utc, maybe_get_tz, treat_tz_as_pytz # ---------------------------------------------------------------------- # Constants @@ -927,8 +927,17 @@ class Timestamp(_Timestamp): _tzinfo = tzinfo # reconstruct & check bounds - ts_input = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, - dts.sec, dts.us, tzinfo=_tzinfo) + if _tzinfo is not None and treat_tz_as_pytz(_tzinfo): + # be careful about DST transition, #18319 + ts_input = _tzinfo.localize(datetime(dts.year, dts.month, dts.day, + dts.hour, dts.min, dts.sec, + dts.us)) + _tzinfo = ts_input.tzinfo + else: + ts_input = datetime(dts.year, dts.month, dts.day, + dts.hour, dts.min, dts.sec, dts.us, + tzinfo=_tzinfo) + ts = convert_datetime_to_tsobject(ts_input, _tzinfo) value = ts.value + (dts.ps // 1000) if value != NPY_NAT: diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 9d97057569580..541dbc750737b 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -1136,6 +1136,26 @@ def test_timestamp(self): dt = ts.to_pydatetime() assert dt.timestamp() == ts.timestamp() + def test_replace(self): + # GH#18319 + tz = pytz.timezone('US/Eastern') + + ts_naive = Timestamp('2017-12-03 16:03:30') + ts_aware = tz.localize(ts_naive) + + # Preliminary sanity-check + assert ts_aware == ts_aware.tzinfo.normalize(ts_aware) + + # Replace across DST boundary + ts2 = ts_aware.replace(month=6) + + # Check that `replace` preserves hour literal + assert (ts2.hour, ts2.minute) == (ts_aware.hour, ts_aware.minute) + + # Check that post-replace object is appropriately normalized + ts2b = ts2.tzinfo.normalize(ts2) + assert ts2 == ts2b + class TestTimestampNsOperations(object): From 4ca60fd6336489f4d67222b1deef8329023583d7 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 4 Dec 2017 07:03:28 -0800 Subject: [PATCH 2/9] better comment, move test to test_timezones --- pandas/_libs/tslibs/timestamps.pyx | 3 ++- pandas/tests/scalar/test_timestamp.py | 20 -------------------- pandas/tests/tseries/test_timezones.py | 20 ++++++++++++++++++++ 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index db97944591797..c733702e73cdf 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -928,7 +928,8 @@ class Timestamp(_Timestamp): # reconstruct & check bounds if _tzinfo is not None and treat_tz_as_pytz(_tzinfo): - # be careful about DST transition, #18319 + # replacing across a DST boundary may induce a new tzinfo object + # see GH#18319 ts_input = _tzinfo.localize(datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us)) diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 541dbc750737b..9d97057569580 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -1136,26 +1136,6 @@ def test_timestamp(self): dt = ts.to_pydatetime() assert dt.timestamp() == ts.timestamp() - def test_replace(self): - # GH#18319 - tz = pytz.timezone('US/Eastern') - - ts_naive = Timestamp('2017-12-03 16:03:30') - ts_aware = tz.localize(ts_naive) - - # Preliminary sanity-check - assert ts_aware == ts_aware.tzinfo.normalize(ts_aware) - - # Replace across DST boundary - ts2 = ts_aware.replace(month=6) - - # Check that `replace` preserves hour literal - assert (ts2.hour, ts2.minute) == (ts_aware.hour, ts_aware.minute) - - # Check that post-replace object is appropriately normalized - ts2b = ts2.tzinfo.normalize(ts2) - assert ts2 == ts2b - class TestTimestampNsOperations(object): diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index a01166daf6be1..f4a1fce261557 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -1229,6 +1229,26 @@ def f(): dt = Timestamp('2013-11-03 01:59:59.999999-0400', tz='US/Eastern') assert dt.tz_localize(None) == dt.replace(tzinfo=None) + def test_replace_across_dst(self): + # GH#18319 + tz = pytz.timezone('US/Eastern') + + ts_naive = Timestamp('2017-12-03 16:03:30') + ts_aware = tz.localize(ts_naive) + + # Preliminary sanity-check + assert ts_aware == ts_aware.tzinfo.normalize(ts_aware) + + # Replace across DST boundary + ts2 = ts_aware.replace(month=6) + + # Check that `replace` preserves hour literal + assert (ts2.hour, ts2.minute) == (ts_aware.hour, ts_aware.minute) + + # Check that post-replace object is appropriately normalized + ts2b = ts2.tzinfo.normalize(ts2) + assert ts2 == ts2b + def test_ambiguous_compat(self): # validate that pytz and dateutil are compat for dst # when the transition happens From f7389898a62aa0cf79b2fbc252fb884fdbb16f41 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 5 Dec 2017 07:53:24 -0800 Subject: [PATCH 3/9] informative comment in test --- pandas/tests/tseries/test_timezones.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index f4a1fce261557..383e0bf6d4b07 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -1230,7 +1230,8 @@ def f(): assert dt.tz_localize(None) == dt.replace(tzinfo=None) def test_replace_across_dst(self): - # GH#18319 + # GH#18319 check that 1) timezone is correctly normalized and + # 2) that hour is not incorrectly changed by this normalization tz = pytz.timezone('US/Eastern') ts_naive = Timestamp('2017-12-03 16:03:30') From 6f36a973b893401ae08cfa5328fa9179877d4027 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 3 Dec 2017 16:53:15 -0800 Subject: [PATCH 4/9] handle DST appropriately in Timestamp.replace --- doc/source/whatsnew/v0.22.0.txt | 1 + pandas/_libs/tslibs/timestamps.pyx | 15 ++++++++++++--- pandas/tests/scalar/test_timestamp.py | 20 ++++++++++++++++++++ 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index ae272282040b8..c5915e2813a52 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -264,3 +264,4 @@ Other - Fixed a bug where creating a Series from an array that contains both tz-naive and tz-aware values will result in a Series whose dtype is tz-aware instead of object (:issue:`16406`) - Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`) - Adding a ``Period`` object to a ``datetime`` or ``Timestamp`` object will now correctly raise a ``TypeError`` (:issue:`17983`) +- :func:`Timestamp.replace` will now handle Daylight Savings transitions gracefully (:issue:`18319`) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index cf0c0e2c01d60..db97944591797 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -32,7 +32,7 @@ from np_datetime cimport (reverse_ops, cmp_scalar, check_dts_bounds, is_leapyear) from timedeltas import Timedelta from timedeltas cimport delta_to_nanoseconds -from timezones cimport get_timezone, is_utc, maybe_get_tz +from timezones cimport get_timezone, is_utc, maybe_get_tz, treat_tz_as_pytz # ---------------------------------------------------------------------- # Constants @@ -927,8 +927,17 @@ class Timestamp(_Timestamp): _tzinfo = tzinfo # reconstruct & check bounds - ts_input = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, - dts.sec, dts.us, tzinfo=_tzinfo) + if _tzinfo is not None and treat_tz_as_pytz(_tzinfo): + # be careful about DST transition, #18319 + ts_input = _tzinfo.localize(datetime(dts.year, dts.month, dts.day, + dts.hour, dts.min, dts.sec, + dts.us)) + _tzinfo = ts_input.tzinfo + else: + ts_input = datetime(dts.year, dts.month, dts.day, + dts.hour, dts.min, dts.sec, dts.us, + tzinfo=_tzinfo) + ts = convert_datetime_to_tsobject(ts_input, _tzinfo) value = ts.value + (dts.ps // 1000) if value != NPY_NAT: diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index e23911e8d2003..c4ad73f1947fc 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -1173,6 +1173,26 @@ def test_timestamp_compare_with_early_datetime(self): assert stamp < datetime(2700, 1, 1) assert stamp <= datetime(2700, 1, 1) + def test_replace(self): + # GH#18319 + tz = pytz.timezone('US/Eastern') + + ts_naive = Timestamp('2017-12-03 16:03:30') + ts_aware = tz.localize(ts_naive) + + # Preliminary sanity-check + assert ts_aware == ts_aware.tzinfo.normalize(ts_aware) + + # Replace across DST boundary + ts2 = ts_aware.replace(month=6) + + # Check that `replace` preserves hour literal + assert (ts2.hour, ts2.minute) == (ts_aware.hour, ts_aware.minute) + + # Check that post-replace object is appropriately normalized + ts2b = ts2.tzinfo.normalize(ts2) + assert ts2 == ts2b + class TestTimestampNsOperations(object): From b916b175e75a6c2c3d83a80a7bce760b211f6a20 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 4 Dec 2017 07:03:28 -0800 Subject: [PATCH 5/9] better comment, move test to test_timezones --- pandas/_libs/tslibs/timestamps.pyx | 3 ++- pandas/tests/scalar/test_timestamp.py | 20 -------------------- pandas/tests/tseries/test_timezones.py | 20 ++++++++++++++++++++ 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index db97944591797..c733702e73cdf 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -928,7 +928,8 @@ class Timestamp(_Timestamp): # reconstruct & check bounds if _tzinfo is not None and treat_tz_as_pytz(_tzinfo): - # be careful about DST transition, #18319 + # replacing across a DST boundary may induce a new tzinfo object + # see GH#18319 ts_input = _tzinfo.localize(datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us)) diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index c4ad73f1947fc..e23911e8d2003 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -1173,26 +1173,6 @@ def test_timestamp_compare_with_early_datetime(self): assert stamp < datetime(2700, 1, 1) assert stamp <= datetime(2700, 1, 1) - def test_replace(self): - # GH#18319 - tz = pytz.timezone('US/Eastern') - - ts_naive = Timestamp('2017-12-03 16:03:30') - ts_aware = tz.localize(ts_naive) - - # Preliminary sanity-check - assert ts_aware == ts_aware.tzinfo.normalize(ts_aware) - - # Replace across DST boundary - ts2 = ts_aware.replace(month=6) - - # Check that `replace` preserves hour literal - assert (ts2.hour, ts2.minute) == (ts_aware.hour, ts_aware.minute) - - # Check that post-replace object is appropriately normalized - ts2b = ts2.tzinfo.normalize(ts2) - assert ts2 == ts2b - class TestTimestampNsOperations(object): diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 5fd2089d234c1..6f4034514f2c7 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -1229,6 +1229,26 @@ def f(): dt = Timestamp('2013-11-03 01:59:59.999999-0400', tz='US/Eastern') assert dt.tz_localize(None) == dt.replace(tzinfo=None) + def test_replace_across_dst(self): + # GH#18319 + tz = pytz.timezone('US/Eastern') + + ts_naive = Timestamp('2017-12-03 16:03:30') + ts_aware = tz.localize(ts_naive) + + # Preliminary sanity-check + assert ts_aware == ts_aware.tzinfo.normalize(ts_aware) + + # Replace across DST boundary + ts2 = ts_aware.replace(month=6) + + # Check that `replace` preserves hour literal + assert (ts2.hour, ts2.minute) == (ts_aware.hour, ts_aware.minute) + + # Check that post-replace object is appropriately normalized + ts2b = ts2.tzinfo.normalize(ts2) + assert ts2 == ts2b + def test_ambiguous_compat(self): # validate that pytz and dateutil are compat for dst # when the transition happens From b00d763a606daf8fa43bc25e1e21c49fc88c81b3 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 5 Dec 2017 07:53:24 -0800 Subject: [PATCH 6/9] informative comment in test --- pandas/tests/tseries/test_timezones.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 6f4034514f2c7..3efbc67ff0deb 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -1230,7 +1230,8 @@ def f(): assert dt.tz_localize(None) == dt.replace(tzinfo=None) def test_replace_across_dst(self): - # GH#18319 + # GH#18319 check that 1) timezone is correctly normalized and + # 2) that hour is not incorrectly changed by this normalization tz = pytz.timezone('US/Eastern') ts_naive = Timestamp('2017-12-03 16:03:30') From 4cfd949882d9a4ae0037efe26b8d35a11efde864 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 7 Dec 2017 07:53:11 -0800 Subject: [PATCH 7/9] fix duplicate whatsnews caused by merge mixups --- doc/source/whatsnew/v0.22.0.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 91cf1a4bb9b69..d2e07e190f8d7 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -320,10 +320,6 @@ Categorical Other ^^^^^ -- Improved error message when attempting to use a Python keyword as an identifier in a numexpr query (:issue:`18221`) -- Fixed a bug where creating a Series from an array that contains both tz-naive and tz-aware values will result in a Series whose dtype is tz-aware instead of object (:issue:`16406`) -- Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`) -- Adding a ``Period`` object to a ``datetime`` or ``Timestamp`` object will now correctly raise a ``TypeError`` (:issue:`17983`) - :func:`Timestamp.replace` will now handle Daylight Savings transitions gracefully (:issue:`18319`) - Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`) - From 4459c9fa2be74a849cb206f1fa181419e6c9d66c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 29 Dec 2017 11:14:38 -0800 Subject: [PATCH 8/9] fixup merge messup --- doc/source/whatsnew/v0.22.0.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index f4cee0642953b..da4acd99e3873 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -215,3 +215,6 @@ returns ``0``. s = pd.Series([np.nan, np.nan]) s.rolling(2, min_periods=0).sum() + +The default behavior of ``min_periods=None``, implying that ``min_periods`` +equals the window size, is unchanged. From dced361da7f44d8c1c47be21b502ce69c0d83ae7 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 4 Jan 2018 09:44:44 -0800 Subject: [PATCH 9/9] refactor test to apply to both pytz and dateutil --- pandas/tests/tseries/test_timezones.py | 50 +++++++++++++++----------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 51c32632a543c..7ae63d7d080cc 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -61,6 +61,10 @@ def tzstr(self, tz): def localize(self, tz, x): return tz.localize(x) + def normalize(self, ts): + tzinfo = ts.tzinfo + return tzinfo.normalize(ts) + def cmptz(self, tz1, tz2): # Compare two timezones. Overridden in subclass to parameterize # tests. @@ -935,6 +939,27 @@ def test_datetimeindex_tz_nat(self): assert isna(idx[1]) assert idx[0].tzinfo is not None + def test_replace_across_dst(self): + # GH#18319 check that 1) timezone is correctly normalized and + # 2) that hour is not incorrectly changed by this normalization + tz = self.tz('US/Eastern') + + ts_naive = Timestamp('2017-12-03 16:03:30') + ts_aware = self.localize(tz, ts_naive) + + # Preliminary sanity-check + assert ts_aware == self.normalize(ts_aware) + + # Replace across DST boundary + ts2 = ts_aware.replace(month=6) + + # Check that `replace` preserves hour literal + assert (ts2.hour, ts2.minute) == (ts_aware.hour, ts_aware.minute) + + # Check that post-replace object is appropriately normalized + ts2b = self.normalize(ts2) + assert ts2 == ts2b + class TestTimeZoneSupportDateutil(TestTimeZoneSupportPytz): @@ -959,6 +984,10 @@ def cmptz(self, tz1, tz2): def localize(self, tz, x): return x.replace(tzinfo=tz) + def normalize(self, ts): + # no-op for dateutil + return ts + @td.skip_if_windows def test_utc_with_system_utc(self): from pandas._libs.tslibs.timezones import maybe_get_tz @@ -1228,27 +1257,6 @@ def f(): dt = Timestamp('2013-11-03 01:59:59.999999-0400', tz='US/Eastern') assert dt.tz_localize(None) == dt.replace(tzinfo=None) - def test_replace_across_dst(self): - # GH#18319 check that 1) timezone is correctly normalized and - # 2) that hour is not incorrectly changed by this normalization - tz = pytz.timezone('US/Eastern') - - ts_naive = Timestamp('2017-12-03 16:03:30') - ts_aware = tz.localize(ts_naive) - - # Preliminary sanity-check - assert ts_aware == ts_aware.tzinfo.normalize(ts_aware) - - # Replace across DST boundary - ts2 = ts_aware.replace(month=6) - - # Check that `replace` preserves hour literal - assert (ts2.hour, ts2.minute) == (ts_aware.hour, ts_aware.minute) - - # Check that post-replace object is appropriately normalized - ts2b = ts2.tzinfo.normalize(ts2) - assert ts2 == ts2b - def test_ambiguous_compat(self): # validate that pytz and dateutil are compat for dst # when the transition happens