-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Don't set encoding attributes on bounds variables. #2965
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ff2fd49
302ab63
931f973
5526fe4
3889ba6
6f2bc05
b903e89
70c8c5c
d637e9e
2f1dd25
12f3e55
f8789e7
e0c49a4
674e5a5
34d0e60
f187ca1
b1dcf1d
c63cf33
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
from .coding import strings, times, variables | ||
from .coding.variables import SerializationWarning | ||
from .core import duck_array_ops, indexing | ||
from .core.common import contains_cftime_datetimes | ||
from .core.pycompat import dask_array_type | ||
from .core.variable import IndexVariable, Variable, as_variable | ||
|
||
|
@@ -355,6 +356,51 @@ def _update_bounds_attributes(variables): | |
bounds_attrs.setdefault('calendar', attrs['calendar']) | ||
|
||
|
||
def _update_bounds_encoding(variables): | ||
"""Adds time encoding to time bounds variables. | ||
|
||
Variables handling time bounds ("Cell boundaries" in the CF | ||
conventions) do not necessarily carry the necessary attributes to be | ||
decoded. This copies the encoding from the time variable to the | ||
associated bounds variable so that we write CF-compliant files. | ||
|
||
See Also: | ||
|
||
http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/ | ||
cf-conventions.html#cell-boundaries | ||
|
||
https://github.com/pydata/xarray/issues/2565 | ||
""" | ||
|
||
# For all time variables with bounds | ||
for v in variables.values(): | ||
attrs = v.attrs | ||
encoding = v.encoding | ||
has_date_units = 'units' in encoding and 'since' in encoding['units'] | ||
is_datetime_type = (np.issubdtype(v.dtype, np.datetime64) or | ||
contains_cftime_datetimes(v)) | ||
|
||
if (is_datetime_type and not has_date_units and | ||
'bounds' in attrs and attrs['bounds'] in variables): | ||
warnings.warn("Variable '{0}' has datetime type and a " | ||
"bounds variable but {0}.encoding does not have " | ||
"units specified. The units encodings for '{0}' " | ||
"and '{1}' will be determined independently " | ||
"and may not be equal, counter to CF-conventions. " | ||
"If this is a concern, specify a units encoding for " | ||
"'{0}' before writing to a file." | ||
.format(v.name, attrs['bounds']), | ||
UserWarning) | ||
|
||
if has_date_units and 'bounds' in attrs: | ||
if attrs['bounds'] in variables: | ||
bounds_encoding = variables[attrs['bounds']].encoding | ||
bounds_encoding.setdefault('units', encoding['units']) | ||
if 'calendar' in encoding: | ||
bounds_encoding.setdefault('calendar', | ||
encoding['calendar']) | ||
|
||
|
||
def decode_cf_variables(variables, attributes, concat_characters=True, | ||
mask_and_scale=True, decode_times=True, | ||
decode_coords=True, drop_variables=None, | ||
|
@@ -492,8 +538,6 @@ def cf_decoder(variables, attributes, | |
""" | ||
Decode a set of CF encoded variables and attributes. | ||
|
||
See Also, decode_cf_variable | ||
|
||
Parameters | ||
---------- | ||
variables : dict | ||
|
@@ -515,6 +559,10 @@ def cf_decoder(variables, attributes, | |
A dictionary mapping from variable name to xarray.Variable objects. | ||
decoded_attributes : dict | ||
A dictionary mapping from attribute name to values. | ||
|
||
See also | ||
-------- | ||
decode_cf_variable | ||
""" | ||
variables, attributes, _ = decode_cf_variables( | ||
variables, attributes, concat_characters, mask_and_scale, decode_times) | ||
|
@@ -595,14 +643,12 @@ def encode_dataset_coordinates(dataset): | |
|
||
def cf_encoder(variables, attributes): | ||
""" | ||
A function which takes a dicts of variables and attributes | ||
and encodes them to conform to CF conventions as much | ||
as possible. This includes masking, scaling, character | ||
array handling, and CF-time encoding. | ||
|
||
Decode a set of CF encoded variables and attributes. | ||
Encode a set of CF encoded variables and attributes. | ||
Takes a dicts of variables and attributes and encodes them | ||
to conform to CF conventions as much as possible. | ||
This includes masking, scaling, character array handling, | ||
and CF-time encoding. | ||
|
||
See Also, decode_cf_variable | ||
|
||
Parameters | ||
---------- | ||
|
@@ -618,8 +664,27 @@ def cf_encoder(variables, attributes): | |
encoded_attributes : dict | ||
A dictionary mapping from attribute name to value | ||
|
||
See also: encode_cf_variable | ||
See also | ||
-------- | ||
decode_cf_variable, encode_cf_variable | ||
""" | ||
|
||
# add encoding for time bounds variables if present. | ||
_update_bounds_encoding(variables) | ||
|
||
new_vars = OrderedDict((k, encode_cf_variable(v, name=k)) | ||
for k, v in variables.items()) | ||
|
||
# Remove attrs from bounds variables (issue #2921) | ||
for var in new_vars.values(): | ||
bounds = var.attrs['bounds'] if 'bounds' in var.attrs else None | ||
if bounds and bounds in new_vars: | ||
# see http://cfconventions.org/cf-conventions/cf-conventions.html#cell-boundaries # noqa | ||
for attr in ['units', 'standard_name', 'axis', 'positive', | ||
'calendar', 'long_name', 'leap_month', 'leap_year', | ||
'month_lengths']: | ||
if attr in new_vars[bounds].attrs and attr in var.attrs: | ||
if new_vars[bounds].attrs[attr] == var.attrs[attr]: | ||
new_vars[bounds].attrs.pop(attr) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we issue a warning here? else:
warning.warn("The attribute 'units' is not the same in the variable "
"'time' and it's associated bounds 'time_bnds',"
" which is not cf-compliant.") or some such There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to? xarray allows writing CF-non-compliant files anyway... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the warning you added is the perfect approach. It will still be issued in the case of @mathause's example, but will still allow a user to write a non-CF-compliant file without a warning if the encoding attributes do not need to be computed on the fly. |
||
return new_vars, attributes |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A general question -- would we consider
'bounds'
to be an encoding parameter (like'units'
or'calendar'
)? In other words should we expect it to be in theencoding
dictionary orattrs
dictionary at this stage? I feel like it may be more intuitive as part ofencoding
, but currently I know that we don't treat it that way when decoding files.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In my mental model, encoding attributes are those that control on-disk representation of the data.
bounds
counts as an attr to my mind since it's an attribute that links the variable to another variable.A definition or list of what goes in
encoding
and what goes inattrs
would make a good addition to the docs.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is fair; I guess I was going off of the mental model of encoding parameters defined as "attributes that are potentially required for decoding all the variables in a file," in which case
'bounds'
could qualify. I think your definition is probably cleaner, because it requires that encoding parameters control how the variable they are attached to is represented on disk (as opposed to another variable).