Skip to content

Commit 46169ab

Browse files
committed
Support _FillValue or missing_value encoding
1 parent f269e68 commit 46169ab

File tree

2 files changed

+52
-18
lines changed

2 files changed

+52
-18
lines changed

xarray/coding/times.py

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,6 @@
9494

9595

9696
_INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS = [
97-
"_FillValue",
98-
"missing_value",
9997
"add_offset",
10098
"scale_factor",
10199
]
@@ -1465,17 +1463,17 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable:
14651463
k in encoding for k in _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS
14661464
):
14671465
raise ValueError(
1468-
f"Specifying '_FillValue', 'missing_value', "
1469-
f"'add_offset', or 'scale_factor' is not supported "
1470-
f"when literally encoding the np.timedelta64 values "
1471-
f"of variable {name!r}. To encode {name!r} with such "
1472-
f"encoding parameters, additionally set "
1473-
f"encoding['units'] to a unit of time, e.g. "
1474-
f"'seconds'. To proceed with literal np.timedelta64 "
1475-
f"encoding of {name!r}, remove any encoding entries "
1476-
f"for '_FillValue', 'missing_value', 'add_offset', "
1477-
f"or 'scale_factor'."
1466+
f"Specifying 'add_offset' or 'scale_factor' is not "
1467+
f"supported when literally encoding the "
1468+
f"np.timedelta64 values of variable {name!r}. To "
1469+
f"encode {name!r} with such encoding parameters, "
1470+
f"additionally set encoding['units'] to a unit of "
1471+
f"time, e.g. 'seconds'. To proceed with literal "
1472+
f"np.timedelta64 encoding of {name!r}, remove any "
1473+
f"encoding entries for 'add_offset' or 'scale_factor'."
14781474
)
1475+
if "_FillValue" not in encoding and "missing_value" not in encoding:
1476+
encoding["_FillValue"] = np.iinfo(np.int64).min
14791477

14801478
data, units = encode_cf_timedelta(data, units, dtype)
14811479
safe_setitem(attrs, "units", units, name=name)
@@ -1500,9 +1498,8 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable:
15001498
):
15011499
raise ValueError(
15021500
"Decoding np.timedelta64 values via dtype is not "
1503-
"supported when '_FillValue', 'missing_value', "
1504-
"'add_offset', or 'scale_factor' are present in "
1505-
"encoding."
1501+
"supported when 'add_offset', or 'scale_factor' are "
1502+
"present in encoding."
15061503
)
15071504
dtype = pop_to(attrs, encoding, "dtype", name=name)
15081505
dtype = np.dtype(dtype)

xarray/tests/test_coding_times.py

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1964,14 +1964,15 @@ def test_decode_floating_point_timedelta_no_serialization_warning() -> None:
19641964

19651965

19661966
def test_literal_timedelta64_coding(time_unit: PDDatetimeUnitOptions) -> None:
1967-
timedeltas = pd.timedelta_range(0, freq="D", periods=3, unit=time_unit) # type: ignore[call-arg]
1967+
timedeltas = np.array([0, 1, "NaT"], dtype=f"timedelta64[{time_unit}]")
19681968
variable = Variable(["time"], timedeltas)
19691969
expected_dtype = f"timedelta64[{time_unit}]"
19701970
expected_units = _numpy_to_netcdf_timeunit(time_unit)
19711971

19721972
encoded = conventions.encode_cf_variable(variable)
19731973
assert encoded.attrs["dtype"] == expected_dtype
19741974
assert encoded.attrs["units"] == expected_units
1975+
assert encoded.attrs["_FillValue"] == np.iinfo(np.int64).min
19751976

19761977
decoded = conventions.decode_cf_variable("timedeltas", encoded)
19771978
assert decoded.encoding["dtype"] == expected_dtype
@@ -2011,7 +2012,7 @@ def test_literal_timedelta_encode_invalid_attribute(attribute) -> None:
20112012

20122013

20132014
@pytest.mark.parametrize("invalid_key", _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS)
2014-
def test_literal_timedelta_encoding_mask_and_scale_error(invalid_key) -> None:
2015+
def test_literal_timedelta_encoding_invalid_key_error(invalid_key) -> None:
20152016
encoding = {invalid_key: 1.0}
20162017
timedeltas = pd.timedelta_range(0, freq="D", periods=3)
20172018
variable = Variable(["time"], timedeltas, encoding=encoding)
@@ -2020,7 +2021,7 @@ def test_literal_timedelta_encoding_mask_and_scale_error(invalid_key) -> None:
20202021

20212022

20222023
@pytest.mark.parametrize("invalid_key", _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS)
2023-
def test_literal_timedelta_decoding_mask_and_scale_error(invalid_key) -> None:
2024+
def test_literal_timedelta_decoding_invalid_key_error(invalid_key) -> None:
20242025
attrs = {invalid_key: 1.0, "dtype": "timedelta64[s]", "units": "seconds"}
20252026
variable = Variable(["time"], [0, 1, 2], attrs=attrs)
20262027
with pytest.raises(ValueError, match=invalid_key):
@@ -2044,6 +2045,12 @@ def test_literal_timedelta_decoding_mask_and_scale_error(invalid_key) -> None:
20442045
def test_timedelta_decoding_options(
20452046
decode_via_units, decode_via_dtype, attrs, expect_timedelta64
20462047
) -> None:
2048+
# Note with literal timedelta encoding, we always add a _FillValue, even
2049+
# if one is not present in the original encoding parameters, which is why
2050+
# we ensure one is defined here when "dtype" is present in attrs.
2051+
if "dtype" in attrs:
2052+
attrs["_FillValue"] = np.iinfo(np.int64).min
2053+
20472054
array = np.array([0, 1, 2], dtype=np.dtype("int64"))
20482055
encoded = Variable(["time"], array, attrs=attrs)
20492056

@@ -2083,3 +2090,33 @@ def test_timedelta_encoding_explicit_non_timedelta64_dtype() -> None:
20832090
assert_identical(reencoded, encoded)
20842091
assert encoded.attrs["units"] == "days"
20852092
assert encoded.dtype == np.dtype("int32")
2093+
2094+
2095+
@pytest.mark.parametrize("mask_attribute", ["_FillValue", "missing_value"])
2096+
def test_literal_timedelta64_coding_with_mask(
2097+
time_unit: PDDatetimeUnitOptions, mask_attribute: str
2098+
) -> None:
2099+
timedeltas = np.array([0, 1, "NaT"], dtype=f"timedelta64[{time_unit}]")
2100+
mask = 10
2101+
variable = Variable(["time"], timedeltas, encoding={mask_attribute: mask})
2102+
expected_dtype = f"timedelta64[{time_unit}]"
2103+
expected_units = _numpy_to_netcdf_timeunit(time_unit)
2104+
2105+
encoded = conventions.encode_cf_variable(variable)
2106+
assert encoded.attrs["dtype"] == expected_dtype
2107+
assert encoded.attrs["units"] == expected_units
2108+
assert encoded.attrs[mask_attribute] == mask
2109+
assert encoded[-1] == mask
2110+
2111+
decoded = conventions.decode_cf_variable("timedeltas", encoded)
2112+
assert decoded.encoding["dtype"] == expected_dtype
2113+
assert decoded.encoding["units"] == expected_units
2114+
assert decoded.encoding[mask_attribute] == mask
2115+
assert np.isnat(decoded[-1])
2116+
2117+
assert_identical(decoded, variable)
2118+
assert decoded.dtype == variable.dtype
2119+
2120+
reencoded = conventions.encode_cf_variable(decoded)
2121+
assert_identical(reencoded, encoded)
2122+
assert reencoded.dtype == encoded.dtype

0 commit comments

Comments
 (0)