Skip to content

Commit 45fd0e6

Browse files
authored
Numpy 1.18 support (#3537)
* Closes #3409 * Unpin versions * Rewrite unit test for clarity about its real scope * mean() on dask * Trivial * duck_array_ops should never receive xarray.Variable
1 parent 980a1d2 commit 45fd0e6

File tree

8 files changed

+74
-25
lines changed

8 files changed

+74
-25
lines changed

ci/azure/install.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ steps:
1616
--pre \
1717
--upgrade \
1818
matplotlib \
19+
numpy \
1920
pandas \
2021
scipy
21-
# numpy \ # FIXME https://github.com/pydata/xarray/issues/3409
2222
pip install \
2323
--no-deps \
2424
--upgrade \

ci/requirements/py36.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ dependencies:
2525
- nc-time-axis
2626
- netcdf4
2727
- numba
28-
- numpy<1.18 # FIXME https://github.com/pydata/xarray/issues/3409
28+
- numpy
2929
- pandas
3030
- pint
3131
- pip

ci/requirements/py37.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ dependencies:
2525
- nc-time-axis
2626
- netcdf4
2727
- numba
28-
- numpy<1.18 # FIXME https://github.com/pydata/xarray/issues/3409
28+
- numpy
2929
- pandas
3030
- pint
3131
- pip

doc/whats-new.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,12 @@ Bug fixes
115115
(:issue:`3402`). By `Deepak Cherian <https://github.com/dcherian/>`_
116116
- Allow appending datetime and bool data variables to zarr stores.
117117
(:issue:`3480`). By `Akihiro Matsukawa <https://github.com/amatsukawa/>`_.
118+
- Add support for numpy >=1.18 (); bugfix mean() on datetime64 arrays on dask backend
119+
(:issue:`3409`, :pull:`3537`). By `Guido Imperiale <https://github.com/crusaderky>`_.
120+
- Add support for pandas >=0.26 (:issue:`3440`).
121+
By `Deepak Cherian <https://github.com/dcherian>`_.
122+
- Add support for pseudonetcdf >=3.1 (:pull:`3485`).
123+
By `Barron Henderson <https://github.com/barronh>`_.
118124

119125
Documentation
120126
~~~~~~~~~~~~~
@@ -133,7 +139,6 @@ Documentation
133139

134140
Internal Changes
135141
~~~~~~~~~~~~~~~~
136-
137142
- Added integration tests against `pint <https://pint.readthedocs.io/>`_.
138143
(:pull:`3238`, :pull:`3447`, :pull:`3493`, :pull:`3508`)
139144
by `Justus Magin <https://github.com/keewis>`_.

xarray/core/dataset.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5316,7 +5316,9 @@ def _integrate_one(self, coord, datetime_unit=None):
53165316
datetime_unit, _ = np.datetime_data(coord_var.dtype)
53175317
elif datetime_unit is None:
53185318
datetime_unit = "s" # Default to seconds for cftime objects
5319-
coord_var = datetime_to_numeric(coord_var, datetime_unit=datetime_unit)
5319+
coord_var = coord_var._replace(
5320+
data=datetime_to_numeric(coord_var.data, datetime_unit=datetime_unit)
5321+
)
53205322

53215323
variables = {}
53225324
coord_names = set()

xarray/core/duck_array_ops.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,26 @@ def f(values, axis=None, skipna=None, **kwargs):
351351
_mean = _create_nan_agg_method("mean")
352352

353353

354+
def _datetime_nanmin(array):
355+
"""nanmin() function for datetime64.
356+
357+
Caveats that this function deals with:
358+
359+
- In numpy < 1.18, min() on datetime64 incorrectly ignores NaT
360+
- numpy nanmin() don't work on datetime64 (all versions at the moment of writing)
361+
- dask min() does not work on datetime64 (all versions at the moment of writing)
362+
"""
363+
assert array.dtype.kind in "mM"
364+
dtype = array.dtype
365+
# (NaT).astype(float) does not produce NaN...
366+
array = where(pandas_isnull(array), np.nan, array.astype(float))
367+
array = min(array, skipna=True)
368+
if isinstance(array, float):
369+
array = np.array(array)
370+
# ...but (NaN).astype("M8") does produce NaT
371+
return array.astype(dtype)
372+
373+
354374
def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
355375
"""Convert an array containing datetime-like data to an array of floats.
356376
@@ -370,7 +390,10 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
370390
"""
371391
# TODO: make this function dask-compatible?
372392
if offset is None:
373-
offset = array.min()
393+
if array.dtype.kind in "Mm":
394+
offset = _datetime_nanmin(array)
395+
else:
396+
offset = min(array)
374397
array = array - offset
375398

376399
if not hasattr(array, "dtype"): # scalar is converted to 0d-array
@@ -401,7 +424,8 @@ def mean(array, axis=None, skipna=None, **kwargs):
401424

402425
array = asarray(array)
403426
if array.dtype.kind in "Mm":
404-
offset = min(array)
427+
offset = _datetime_nanmin(array)
428+
405429
# xarray always uses np.datetime64[ns] for np.datetime64 data
406430
dtype = "timedelta64[ns]"
407431
return (

xarray/tests/test_dataset.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5874,7 +5874,9 @@ def test_trapz_datetime(dask, which_datetime):
58745874

58755875
actual = da.integrate("time", datetime_unit="D")
58765876
expected_data = np.trapz(
5877-
da, duck_array_ops.datetime_to_numeric(da["time"], datetime_unit="D"), axis=0
5877+
da.data,
5878+
duck_array_ops.datetime_to_numeric(da["time"].data, datetime_unit="D"),
5879+
axis=0,
58785880
)
58795881
expected = xr.DataArray(
58805882
expected_data,

xarray/tests/test_duck_array_ops.py

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -274,23 +274,39 @@ def assert_dask_array(da, dask):
274274

275275

276276
@arm_xfail
277-
@pytest.mark.parametrize("dask", [False, True])
278-
def test_datetime_reduce(dask):
279-
time = np.array(pd.date_range("15/12/1999", periods=11))
280-
time[8:11] = np.nan
281-
da = DataArray(np.linspace(0, 365, num=11), dims="time", coords={"time": time})
282-
283-
if dask and has_dask:
284-
chunks = {"time": 5}
285-
da = da.chunk(chunks)
286-
287-
actual = da["time"].mean()
288-
assert not pd.isnull(actual)
289-
actual = da["time"].mean(skipna=False)
290-
assert pd.isnull(actual)
291-
292-
# test for a 0d array
293-
assert da["time"][0].mean() == da["time"][:1].mean()
277+
@pytest.mark.parametrize("dask", [False, True] if has_dask else [False])
278+
def test_datetime_mean(dask):
279+
# Note: only testing numpy, as dask is broken upstream
280+
da = DataArray(
281+
np.array(["2010-01-01", "NaT", "2010-01-03", "NaT", "NaT"], dtype="M8"),
282+
dims=["time"],
283+
)
284+
if dask:
285+
# Trigger use case where a chunk is full of NaT
286+
da = da.chunk({"time": 3})
287+
288+
expect = DataArray(np.array("2010-01-02", dtype="M8"))
289+
expect_nat = DataArray(np.array("NaT", dtype="M8"))
290+
291+
actual = da.mean()
292+
if dask:
293+
assert actual.chunks is not None
294+
assert_equal(actual, expect)
295+
296+
actual = da.mean(skipna=False)
297+
if dask:
298+
assert actual.chunks is not None
299+
assert_equal(actual, expect_nat)
300+
301+
# tests for 1d array full of NaT
302+
assert_equal(da[[1]].mean(), expect_nat)
303+
assert_equal(da[[1]].mean(skipna=False), expect_nat)
304+
305+
# tests for a 0d array
306+
assert_equal(da[0].mean(), da[0])
307+
assert_equal(da[0].mean(skipna=False), da[0])
308+
assert_equal(da[1].mean(), expect_nat)
309+
assert_equal(da[1].mean(skipna=False), expect_nat)
294310

295311

296312
@requires_cftime

0 commit comments

Comments
 (0)