From 206c726af7b19c423a54793e5ab4e70f2cec3844 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 16 Sep 2024 20:32:11 -0600 Subject: [PATCH 01/11] Fix fill_value handling for complex & datetime dtypes --- src/zarr/core/metadata/v3.py | 2 +- src/zarr/testing/strategies.py | 20 +++++--------------- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index 603cd343af..c4b47409e7 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -360,7 +360,7 @@ def parse_fill_value( if fill_value is None: return dtype.type(0) if isinstance(fill_value, Sequence) and not isinstance(fill_value, str): - if dtype in (np.complex64, np.complex128): + if dtype.type in (np.complex64, np.complex128): dtype = cast(COMPLEX_DTYPE, dtype) if len(fill_value) == 2: # complex datatypes serialize to JSON arrays with two elements diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 3f9d1264d9..4e7fb7ac2e 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -1,4 +1,3 @@ -import re from typing import Any import hypothesis.extra.numpy as npst @@ -42,7 +41,7 @@ ) stores = st.builds(MemoryStore, st.just({}), mode=st.just("w")) compressors = st.sampled_from([None, "default"]) -format = st.sampled_from([2, 3]) +zarr_formats = st.sampled_from([2, 3]) @st.composite # type: ignore[misc] @@ -72,14 +71,15 @@ def arrays( paths: st.SearchStrategy[None | str] = paths, array_names: st.SearchStrategy = array_names, attrs: st.SearchStrategy = attrs, - format: st.SearchStrategy = format, + zarr_formats: st.SearchStrategy = zarr_formats, ) -> Array: store = draw(stores) nparray, chunks = draw(np_array_and_chunks(arrays=arrays)) path = draw(paths) name = draw(array_names) attributes = draw(attrs) - zarr_format = draw(format) + zarr_format = draw(zarr_formats) + fill_value = draw(npst.from_dtype(nparray.dtype)) # compressor = draw(compressors) # TODO: clean this up @@ -105,16 +105,6 @@ def arrays( array_path = path + ("/" if not path.endswith("/") else "") + name root = Group.from_store(store, zarr_format=zarr_format) - fill_value_args: tuple[Any, ...] = tuple() - if nparray.dtype.kind == "M": - m = re.search(r"\[(.+)\]", nparray.dtype.str) - if not m: - raise ValueError(f"Couldn't find precision for dtype '{nparray.dtype}.") - - fill_value_args = ( - # e.g. ns, D - m.groups()[0], - ) a = root.create_array( array_path, @@ -123,7 +113,7 @@ def arrays( dtype=nparray.dtype.str, attributes=attributes, # compressor=compressor, # TODO: FIXME - fill_value=nparray.dtype.type(0, *fill_value_args), + fill_value=fill_value, ) assert isinstance(a, Array) From 5d6d8d60b21eb21b3a02aaecd21fa99bf6644f02 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 17 Sep 2024 14:06:10 -0600 Subject: [PATCH 02/11] cleanup --- src/zarr/testing/strategies.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 4e7fb7ac2e..36f7e41e47 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -120,9 +120,7 @@ def arrays( assert nparray.shape == a.shape assert chunks == a.chunks assert array_path == a.path, (path, name, array_path, a.name, a.path) - # assert array_path == a.name, (path, name, array_path, a.name, a.path) - # assert a.basename is None # TODO - # assert a.store == normalize_store_arg(store) + assert a.basename == name, (a.basename, name) assert dict(a.attrs) == expected_attrs a[:] = nparray From 4857a67c396e33a5f3d022e4cf653a8f0cb7eb86 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 17 Sep 2024 14:26:16 -0600 Subject: [PATCH 03/11] more cleanup --- src/zarr/testing/strategies.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 36f7e41e47..63a11435fc 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -110,13 +110,14 @@ def arrays( array_path, shape=nparray.shape, chunks=chunks, - dtype=nparray.dtype.str, + dtype=nparray.dtype, attributes=attributes, # compressor=compressor, # TODO: FIXME fill_value=fill_value, ) assert isinstance(a, Array) + assert isinstance(root[array_path], Array) assert nparray.shape == a.shape assert chunks == a.chunks assert array_path == a.path, (path, name, array_path, a.name, a.path) From ddcb61f3bf75e582e1358351dc8c568244b11658 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 17 Sep 2024 14:27:20 -0600 Subject: [PATCH 04/11] more cleanup --- src/zarr/testing/strategies.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 63a11435fc..5988d24901 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -82,25 +82,6 @@ def arrays( fill_value = draw(npst.from_dtype(nparray.dtype)) # compressor = draw(compressors) - # TODO: clean this up - # if path is None and name is None: - # array_path = None - # array_name = None - # elif path is None and name is not None: - # array_path = f"{name}" - # array_name = f"/{name}" - # elif path is not None and name is None: - # array_path = path - # array_name = None - # elif path == "/": - # assert name is not None - # array_path = name - # array_name = "/" + name - # else: - # assert name is not None - # array_path = f"{path}/{name}" - # array_name = "/" + array_path - expected_attrs = {} if attributes is None else attributes array_path = path + ("/" if not path.endswith("/") else "") + name From efcf1f59d31535d3b7aecd6b437604c4747c6213 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 17 Sep 2024 14:43:46 -0600 Subject: [PATCH 05/11] Fix default fill_value --- src/zarr/core/array.py | 7 ------- src/zarr/core/buffer/core.py | 3 +++ src/zarr/testing/strategies.py | 6 ++++-- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index b825ca4ca1..a312303885 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -252,12 +252,6 @@ async def _create_v3( shape = parse_shapelike(shape) codecs = list(codecs) if codecs is not None else [BytesCodec()] - if fill_value is None: - if dtype == np.dtype("bool"): - fill_value = False - else: - fill_value = 0 - if chunk_key_encoding is None: chunk_key_encoding = ("default", "/") assert chunk_key_encoding is not None @@ -281,7 +275,6 @@ async def _create_v3( ) array = cls(metadata=metadata, store_path=store_path) - await array._save_metadata(metadata) return array diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py index ba629befa1..58a8c0b7c3 100644 --- a/src/zarr/core/buffer/core.py +++ b/src/zarr/core/buffer/core.py @@ -464,6 +464,9 @@ def __repr__(self) -> str: def all_equal(self, other: Any, equal_nan: bool = True) -> bool: """Compare to `other` using np.array_equal.""" + if other is None: + # Handle None fill_value for Zarr V2 + return False # use array_equal to obtain equal_nan=True functionality data, other = np.broadcast_arrays(self._data, other) result = np.array_equal(self._data, other, equal_nan=equal_nan) diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 5988d24901..40b21814da 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -79,7 +79,8 @@ def arrays( name = draw(array_names) attributes = draw(attrs) zarr_format = draw(zarr_formats) - fill_value = draw(npst.from_dtype(nparray.dtype)) + # test that None works too. + fill_value = draw(st.one_of([st.none(), npst.from_dtype(nparray.dtype)])) # compressor = draw(compressors) expected_attrs = {} if attributes is None else attributes @@ -93,11 +94,12 @@ def arrays( chunks=chunks, dtype=nparray.dtype, attributes=attributes, - # compressor=compressor, # TODO: FIXME + # compressor=compressor, # FIXME fill_value=fill_value, ) assert isinstance(a, Array) + assert a.fill_value is not None assert isinstance(root[array_path], Array) assert nparray.shape == a.shape assert chunks == a.chunks From e31cb6f4b15acd8c12ad6c9c85fec5d7a8dfe5eb Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 23 Sep 2024 21:51:22 -0600 Subject: [PATCH 06/11] Fixes --- src/zarr/core/metadata/v3.py | 2 +- src/zarr/testing/strategies.py | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index c4b47409e7..112a4eb64d 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -391,7 +391,7 @@ def parse_fill_value( pass elif fill_value in ["Infinity", "-Infinity"] and not np.isfinite(casted_value): pass - elif dtype.kind == "f": + elif dtype.kind in "cf": # float comparison is not exact, especially when dtype st.SearchStrategy[np.dtype]: + return ( + npst.integer_dtypes(endianness="=") + | npst.unsigned_integer_dtypes(endianness="=") + | npst.floating_dtypes(endianness="=") + | npst.complex_number_dtypes(endianness="=") + # | npst.unicode_string_dtypes() + # | npst.datetime64_dtypes() + # | npst.timedelta64_dtypes() + ) + + # From https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#node-names # 1. must not be the empty string ("") # 2. must not include the character "/" @@ -33,10 +46,7 @@ attrs = st.none() | st.dictionaries(_attr_keys, _attr_values) paths = st.lists(node_names, min_size=1).map(lambda x: "/".join(x)) | st.just("/") np_arrays = npst.arrays( - # TODO: re-enable timedeltas once they are supported - dtype=npst.scalar_dtypes().filter( - lambda x: (x.kind not in ["m", "M"]) and (x.byteorder not in [">"]) - ), + dtype=dtypes(), shape=npst.array_shapes(max_dims=4), ) stores = st.builds(MemoryStore, st.just({}), mode=st.just("w")) From 6db822534cea9061dc75a6d03862fe13ccae2d1d Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 23 Sep 2024 21:56:30 -0600 Subject: [PATCH 07/11] Add booleans --- src/zarr/testing/strategies.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index a24ac695fc..c27bc76b1d 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -21,7 +21,8 @@ def dtypes() -> st.SearchStrategy[np.dtype]: return ( - npst.integer_dtypes(endianness="=") + npst.boolean_dtypes() + | npst.integer_dtypes(endianness="=") | npst.unsigned_integer_dtypes(endianness="=") | npst.floating_dtypes(endianness="=") | npst.complex_number_dtypes(endianness="=") From 1688c862df986d45349db5967cad8e131ea6fe97 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 23 Sep 2024 22:44:50 -0600 Subject: [PATCH 08/11] Add v2, v3 specific dtypes --- src/zarr/core/buffer/core.py | 4 ++- src/zarr/testing/strategies.py | 49 +++++++++++++++++++++++++++------- tests/v3/test_properties.py | 11 ++++---- 3 files changed, 48 insertions(+), 16 deletions(-) diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py index 58a8c0b7c3..9a35ae778e 100644 --- a/src/zarr/core/buffer/core.py +++ b/src/zarr/core/buffer/core.py @@ -469,7 +469,9 @@ def all_equal(self, other: Any, equal_nan: bool = True) -> bool: return False # use array_equal to obtain equal_nan=True functionality data, other = np.broadcast_arrays(self._data, other) - result = np.array_equal(self._data, other, equal_nan=equal_nan) + result = np.array_equal( + self._data, other, equal_nan=equal_nan if self._data.dtype.kind not in "US" else False + ) return result def fill(self, value: Any) -> None: diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index c27bc76b1d..9adc8ffb3c 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -1,4 +1,4 @@ -from typing import Any +from typing import Any, Literal import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -19,19 +19,34 @@ ) -def dtypes() -> st.SearchStrategy[np.dtype]: +def v3_dtypes() -> st.SearchStrategy[np.dtype]: return ( npst.boolean_dtypes() | npst.integer_dtypes(endianness="=") | npst.unsigned_integer_dtypes(endianness="=") | npst.floating_dtypes(endianness="=") | npst.complex_number_dtypes(endianness="=") + # | npst.byte_string_dtypes(endianness="=") # | npst.unicode_string_dtypes() # | npst.datetime64_dtypes() # | npst.timedelta64_dtypes() ) +def v2_dtypes() -> st.SearchStrategy[np.dtype]: + return ( + npst.boolean_dtypes() + | npst.integer_dtypes(endianness="=") + | npst.unsigned_integer_dtypes(endianness="=") + | npst.floating_dtypes(endianness="=") + | npst.complex_number_dtypes(endianness="=") + | npst.byte_string_dtypes(endianness="=") + | npst.unicode_string_dtypes(endianness="=") + | npst.datetime64_dtypes() + # | npst.timedelta64_dtypes() + ) + + # From https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#node-names # 1. must not be the empty string ("") # 2. must not include the character "/" @@ -46,18 +61,29 @@ def dtypes() -> st.SearchStrategy[np.dtype]: array_names = node_names attrs = st.none() | st.dictionaries(_attr_keys, _attr_values) paths = st.lists(node_names, min_size=1).map(lambda x: "/".join(x)) | st.just("/") -np_arrays = npst.arrays( - dtype=dtypes(), - shape=npst.array_shapes(max_dims=4), -) stores = st.builds(MemoryStore, st.just({}), mode=st.just("w")) compressors = st.sampled_from([None, "default"]) -zarr_formats = st.sampled_from([2, 3]) +zarr_formats: st.SearchStrategy[Literal[2, 3]] = st.sampled_from([2, 3]) +array_shapes = npst.array_shapes(max_dims=4) + + +@st.composite # type: ignore[misc] +def numpy_arrays( + draw: st.DrawFn, + *, + shapes: st.SearchStrategy[tuple[int, ...]] = array_shapes, + zarr_formats: st.SearchStrategy[Literal[2, 3]] = zarr_formats, +) -> Any: + """ + Generate numpy arrays that can be saved in the provided Zarr format. + """ + zarr_format = draw(zarr_formats) + return draw(npst.arrays(dtype=v3_dtypes() if zarr_format == 3 else v2_dtypes(), shape=shapes)) @st.composite # type: ignore[misc] def np_array_and_chunks( - draw: st.DrawFn, *, arrays: st.SearchStrategy[np.ndarray] = np_arrays + draw: st.DrawFn, *, arrays: st.SearchStrategy[np.ndarray] = numpy_arrays ) -> tuple[np.ndarray, tuple[int]]: # type: ignore[type-arg] """A hypothesis strategy to generate small sized random arrays. @@ -76,20 +102,23 @@ def np_array_and_chunks( def arrays( draw: st.DrawFn, *, + shapes: st.SearchStrategy[tuple[int, ...]] = array_shapes, compressors: st.SearchStrategy = compressors, stores: st.SearchStrategy[StoreLike] = stores, - arrays: st.SearchStrategy[np.ndarray] = np_arrays, paths: st.SearchStrategy[None | str] = paths, array_names: st.SearchStrategy = array_names, + arrays: st.SearchStrategy | None = None, attrs: st.SearchStrategy = attrs, zarr_formats: st.SearchStrategy = zarr_formats, ) -> Array: store = draw(stores) - nparray, chunks = draw(np_array_and_chunks(arrays=arrays)) path = draw(paths) name = draw(array_names) attributes = draw(attrs) zarr_format = draw(zarr_formats) + if arrays is None: + arrays = numpy_arrays(shapes=shapes, zarr_formats=st.just(zarr_format)) + nparray, chunks = draw(np_array_and_chunks(arrays=arrays)) # test that None works too. fill_value = draw(st.one_of([st.none(), npst.from_dtype(nparray.dtype)])) # compressor = draw(compressors) diff --git a/tests/v3/test_properties.py b/tests/v3/test_properties.py index a78e9207bd..93b3dc9f7e 100644 --- a/tests/v3/test_properties.py +++ b/tests/v3/test_properties.py @@ -7,13 +7,13 @@ import hypothesis.extra.numpy as npst # noqa import hypothesis.strategies as st # noqa from hypothesis import given, settings # noqa -from zarr.testing.strategies import arrays, np_arrays, basic_indices # noqa +from zarr.testing.strategies import arrays, numpy_arrays, basic_indices, zarr_formats # noqa -@given(st.data()) -def test_roundtrip(data: st.DataObject) -> None: - nparray = data.draw(np_arrays) - zarray = data.draw(arrays(arrays=st.just(nparray))) +@given(data=st.data(), zarr_format=zarr_formats) +def test_roundtrip(data: st.DataObject, zarr_format: int) -> None: + nparray = data.draw(numpy_arrays(zarr_formats=st.just(zarr_format))) + zarray = data.draw(arrays(arrays=st.just(nparray), zarr_formats=st.just(zarr_format))) assert_array_equal(nparray, zarray[:]) @@ -31,6 +31,7 @@ def test_basic_indexing(data: st.DataObject) -> None: assert_array_equal(nparray, zarray[:]) +@settings(report_multiple_bugs=False) @given(data=st.data()) def test_vindex(data: st.DataObject) -> None: zarray = data.draw(arrays()) From 2a973b09e2852d56d4d3d1fa608868bc8a4f24da Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 23 Sep 2024 22:45:09 -0600 Subject: [PATCH 09/11] Add version.py to gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index a09fb54d5c..199ab10578 100644 --- a/.gitignore +++ b/.gitignore @@ -84,3 +84,5 @@ fixture/ .DS_Store tests/.hypothesis .hypothesis/ + +zarr/version.py From a49bd326958c9c297a04e3c60130aaa4824b8986 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 23 Sep 2024 22:45:34 -0600 Subject: [PATCH 10/11] cleanpu --- tests/v3/test_properties.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/v3/test_properties.py b/tests/v3/test_properties.py index 93b3dc9f7e..ad444923e3 100644 --- a/tests/v3/test_properties.py +++ b/tests/v3/test_properties.py @@ -31,7 +31,6 @@ def test_basic_indexing(data: st.DataObject) -> None: assert_array_equal(nparray, zarray[:]) -@settings(report_multiple_bugs=False) @given(data=st.data()) def test_vindex(data: st.DataObject) -> None: zarray = data.draw(arrays()) From 1b9e81889751f06a55aead18398692124f473084 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 25 Sep 2024 16:31:10 +0000 Subject: [PATCH 11/11] style: pre-commit fixes --- tests/v3/test_properties.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/v3/test_properties.py b/tests/v3/test_properties.py index aa65477d79..380a4d851e 100644 --- a/tests/v3/test_properties.py +++ b/tests/v3/test_properties.py @@ -6,8 +6,9 @@ import hypothesis.extra.numpy as npst # noqa: E402 import hypothesis.strategies as st # noqa: E402 -from hypothesis import given, settings # noqa: E402 -from zarr.testing.strategies import arrays, numpy_arrays, basic_indices, zarr_formats # noqa: E402 +from hypothesis import given # noqa: E402 + +from zarr.testing.strategies import arrays, basic_indices, numpy_arrays, zarr_formats # noqa: E402 @given(data=st.data(), zarr_format=zarr_formats)