diff --git a/src/zarr/__init__.py b/src/zarr/__init__.py index fdab564c64..227b0cf63e 100644 --- a/src/zarr/__init__.py +++ b/src/zarr/__init__.py @@ -1,34 +1,68 @@ -from __future__ import annotations - -import zarr.codecs # noqa: F401 from zarr._version import version as __version__ +from zarr.api.synchronous import ( + array, + consolidate_metadata, + copy, + copy_all, + copy_store, + create, + empty, + empty_like, + full, + full_like, + group, + load, + ones, + ones_like, + open, + open_array, + open_consolidated, + open_group, + open_like, + save, + save_array, + save_group, + tree, + zeros, + zeros_like, +) from zarr.array import Array, AsyncArray -from zarr.config import config # noqa: F401 +from zarr.config import config from zarr.group import AsyncGroup, Group -from zarr.store import ( - StoreLike, - make_store_path, -) -from zarr.sync import sync as _sync # in case setuptools scm screw up and find version to be 0.0.0 assert not __version__.startswith("0.0.0") - -async def open_auto_async(store: StoreLike) -> AsyncArray | AsyncGroup: - store_path = make_store_path(store) - try: - return await AsyncArray.open(store_path) - except KeyError: - return await AsyncGroup.open(store_path) - - -def open_auto(store: StoreLike) -> Array | Group: - object = _sync( - open_auto_async(store), - ) - if isinstance(object, AsyncArray): - return Array(object) - if isinstance(object, AsyncGroup): - return Group(object) - raise TypeError(f"Unexpected object type. Got {type(object)}.") +__all__ = [ + "__version__", + "config", + "Array", + "AsyncArray", + "Group", + "AsyncGroup", + "tree", + "array", + "consolidate_metadata", + "copy", + "copy_all", + "copy_store", + "create", + "empty", + "empty_like", + "full", + "full_like", + "group", + "load", + "ones", + "ones_like", + "open", + "open_array", + "open_consolidated", + "open_group", + "open_like", + "save", + "save_array", + "save_group", + "zeros", + "zeros_like", +] diff --git a/src/zarr/api/__init__.py b/src/zarr/api/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py new file mode 100644 index 0000000000..52d07fb6fe --- /dev/null +++ b/src/zarr/api/asynchronous.py @@ -0,0 +1,929 @@ +from __future__ import annotations + +import asyncio +import warnings +from collections.abc import Iterable +from typing import Any, Literal, Union, cast + +import numpy as np +import numpy.typing as npt + +from zarr.abc.codec import Codec +from zarr.array import Array, AsyncArray +from zarr.buffer import NDArrayLike +from zarr.chunk_key_encodings import ChunkKeyEncoding +from zarr.common import JSON, ChunkCoords, MemoryOrder, OpenMode, ZarrFormat +from zarr.group import AsyncGroup +from zarr.metadata import ArrayV2Metadata, ArrayV3Metadata +from zarr.store import ( + StoreLike, + make_store_path, +) + +# TODO: this type could use some more thought, noqa to avoid "Variable "asynchronous.ArrayLike" is not valid as a type" +ArrayLike = Union[AsyncArray | Array | npt.NDArray[Any]] # noqa +PathLike = str + + +def _get_shape_chunks(a: ArrayLike | Any) -> tuple[ChunkCoords | None, ChunkCoords | None]: + """helper function to get the shape and chunks from an array-like object""" + shape = None + chunks = None + + if hasattr(a, "shape") and isinstance(a.shape, tuple): + shape = a.shape + + if hasattr(a, "chunks") and isinstance(a.chunks, tuple) and (len(a.chunks) == len(a.shape)): + chunks = a.chunks + + elif hasattr(a, "chunklen"): + # bcolz carray + chunks = (a.chunklen,) + a.shape[1:] + + return shape, chunks + + +def _like_args(a: ArrayLike, kwargs: dict[str, Any]) -> dict[str, Any]: + """set default values for shape and chunks if they are not present in the array-like object""" + + new = kwargs.copy() + + shape, chunks = _get_shape_chunks(a) + if shape is not None: + new["shape"] = shape + if chunks is not None: + new["chunks"] = chunks + + if hasattr(a, "dtype"): + new["dtype"] = a.dtype + + if isinstance(a, AsyncArray): + new["order"] = a.order + if isinstance(a.metadata, ArrayV2Metadata): + new["compressor"] = a.metadata.compressor + new["filters"] = a.metadata.filters + + if isinstance(a.metadata, ArrayV3Metadata): + new["codecs"] = a.metadata.codecs + else: + raise ValueError(f"Unsupported zarr format: {a.metadata.zarr_format}") + else: + # TODO: set default values compressor/codecs + # to do this, we may need to evaluate if this is a v2 or v3 array + # new["compressor"] = "default" + pass + + return new + + +def _handle_zarr_version_or_format( + *, zarr_version: ZarrFormat | None, zarr_format: ZarrFormat | None +) -> ZarrFormat | None: + """handle the deprecated zarr_version kwarg and return zarr_format""" + if zarr_format is not None and zarr_version is not None and zarr_format != zarr_version: + raise ValueError( + f"zarr_format {zarr_format} does not match zarr_version {zarr_version}, please only set one" + ) + if zarr_version is not None: + warnings.warn( + "zarr_version is deprecated, use zarr_format", DeprecationWarning, stacklevel=2 + ) + return zarr_version + return zarr_format + + +def _default_zarr_version() -> ZarrFormat: + """return the default zarr_version""" + # TODO: set default value from config + return 3 + + +async def consolidate_metadata(*args: Any, **kwargs: Any) -> AsyncGroup: + raise NotImplementedError + + +async def copy(*args: Any, **kwargs: Any) -> tuple[int, int, int]: + raise NotImplementedError + + +async def copy_all(*args: Any, **kwargs: Any) -> tuple[int, int, int]: + raise NotImplementedError + + +async def copy_store(*args: Any, **kwargs: Any) -> tuple[int, int, int]: + raise NotImplementedError + + +async def load( + *, + store: StoreLike, + path: str | None = None, + zarr_format: ZarrFormat | None = None, + zarr_version: ZarrFormat | None = None, +) -> NDArrayLike | dict[str, NDArrayLike]: + """Load data from an array or group into memory. + + Parameters + ---------- + store : Store or string + Store or path to directory in file system or name of zip file. + path : str or None, optional + The path within the store from which to load. + + Returns + ------- + out + If the path contains an array, out will be a numpy array. If the path contains + a group, out will be a dict-like object where keys are array names and values + are numpy arrays. + + See Also + -------- + save, savez + + Notes + ----- + If loading data from a group of arrays, data will not be immediately loaded into + memory. Rather, arrays will be loaded into memory as they are requested. + """ + zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + + obj = await open(store=store, path=path, zarr_format=zarr_format) + if isinstance(obj, AsyncArray): + return await obj.getitem(slice(None)) + else: + raise NotImplementedError("loading groups not yet supported") + + +async def open( + *, + store: StoreLike | None = None, + mode: OpenMode | None = None, # type and value changed + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + path: str | None = None, + **kwargs: Any, # TODO: type kwargs as valid args to open_array +) -> AsyncArray | AsyncGroup: + """Convenience function to open a group or array using file-mode-like semantics. + + Parameters + ---------- + store : Store or string, optional + Store or path to directory in file system or name of zip file. + mode : {'r', 'r+', 'a', 'w', 'w-'}, optional + Persistence mode: 'r' means read only (must exist); 'r+' means + read/write (must exist); 'a' means read/write (create if doesn't + exist); 'w' means create (overwrite if exists); 'w-' means create + (fail if exists). + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : str or None, optional + The path within the store to open. + **kwargs + Additional parameters are passed through to :func:`zarr.creation.open_array` or + :func:`zarr.hierarchy.open_group`. + + Returns + ------- + z : array or group + Return type depends on what exists in the given store. + """ + zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + store_path = make_store_path(store, mode=mode) + + if path is not None: + store_path = store_path / path + + try: + return await open_array(store=store_path, zarr_format=zarr_format, **kwargs) + except KeyError: + return await open_group(store=store_path, zarr_format=zarr_format, **kwargs) + + +async def open_consolidated(*args: Any, **kwargs: Any) -> AsyncGroup: + raise NotImplementedError + + +async def save( + store: StoreLike, + *args: NDArrayLike, + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + path: str | None = None, + **kwargs: Any, # TODO: type kwargs as valid args to save +) -> None: + """Convenience function to save an array or group of arrays to the local file system. + + Parameters + ---------- + store : Store or string + Store or path to directory in file system or name of zip file. + args : ndarray + NumPy arrays with data to save. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : str or None, optional + The path within the group where the arrays will be saved. + kwargs + NumPy arrays with data to save. + """ + zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + + if len(args) == 0 and len(kwargs) == 0: + raise ValueError("at least one array must be provided") + if len(args) == 1 and len(kwargs) == 0: + await save_array(store, args[0], zarr_format=zarr_format, path=path) + else: + await save_group(store, *args, zarr_format=zarr_format, path=path, **kwargs) + + +async def save_array( + store: StoreLike, + arr: NDArrayLike, + *, + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + path: str | None = None, + **kwargs: Any, # TODO: type kwargs as valid args to create +) -> None: + """Convenience function to save a NumPy array to the local file system, following a + similar API to the NumPy save() function. + + Parameters + ---------- + store : Store or string + Store or path to directory in file system or name of zip file. + arr : ndarray + NumPy array with data to save. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : str or None, optional + The path within the store where the array will be saved. + kwargs + Passed through to :func:`create`, e.g., compressor. + """ + zarr_format = ( + _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + or _default_zarr_version() + ) + + store_path = make_store_path(store, mode="w") + if path is not None: + store_path = store_path / path + new = await AsyncArray.create( + store_path, + zarr_format=zarr_format, + shape=arr.shape, + dtype=arr.dtype, + chunks=arr.shape, + **kwargs, + ) + await new.setitem(slice(None), arr) + + +async def save_group( + store: StoreLike, + *args: NDArrayLike, + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + path: str | None = None, + **kwargs: NDArrayLike, +) -> None: + """Convenience function to save several NumPy arrays to the local file system, following a + similar API to the NumPy savez()/savez_compressed() functions. + + Parameters + ---------- + store : Store or string + Store or path to directory in file system or name of zip file. + args : ndarray + NumPy arrays with data to save. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : str or None, optional + Path within the store where the group will be saved. + kwargs + NumPy arrays with data to save. + """ + zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + + if len(args) == 0 and len(kwargs) == 0: + raise ValueError("at least one array must be provided") + aws = [] + for i, arr in enumerate(args): + aws.append(save_array(store, arr, zarr_format=zarr_format, path=f"{path}/arr_{i}")) + for k, arr in kwargs.items(): + _path = f"{path}/{k}" if path is not None else k + aws.append(save_array(store, arr, zarr_format=zarr_format, path=_path)) + await asyncio.gather(*aws) + + +async def tree(*args: Any, **kwargs: Any) -> None: + raise NotImplementedError + + +async def array(data: NDArrayLike, **kwargs: Any) -> AsyncArray: + """Create an array filled with `data`. + + Parameters + ---------- + data : array_like + The data to fill the array with. + kwargs + Passed through to :func:`create`. + + Returns + ------- + array : array + The new array. + """ + + # ensure data is array-like + if not hasattr(data, "shape") or not hasattr(data, "dtype"): + data = np.asanyarray(data) + + # setup dtype + kw_dtype = kwargs.get("dtype") + if kw_dtype is None: + kwargs["dtype"] = data.dtype + else: + kwargs["dtype"] = kw_dtype + + # setup shape and chunks + data_shape, data_chunks = _get_shape_chunks(data) + kwargs["shape"] = data_shape + kw_chunks = kwargs.get("chunks") + if kw_chunks is None: + kwargs["chunks"] = data_chunks + else: + kwargs["chunks"] = kw_chunks + + read_only = kwargs.pop("read_only", False) + if read_only: + raise ValueError("read_only=True is no longer supported when creating new arrays") + + # instantiate array + z = await create(**kwargs) + + # fill with data + await z.setitem(slice(None), data) + + return z + + +async def group( + *, # Note: this is a change from v2 + store: StoreLike | None = None, + overwrite: bool = False, + chunk_store: StoreLike | None = None, # not used + cache_attrs: bool | None = None, # not used, default changed + synchronizer: Any | None = None, # not used + path: str | None = None, + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + meta_array: Any | None = None, # not used + attributes: dict[str, JSON] | None = None, +) -> AsyncGroup: + """Create a group. + + Parameters + ---------- + store : Store or string, optional + Store or path to directory in file system. + overwrite : bool, optional + If True, delete any pre-existing data in `store` at `path` before + creating the group. + chunk_store : Store, optional + Separate storage for chunks. If not provided, `store` will be used + for storage of both chunks and metadata. + cache_attrs : bool, optional + If True (default), user attributes will be cached for attribute read + operations. If False, user attributes are reloaded from the store prior + to all attribute read operations. + synchronizer : object, optional + Array synchronizer. + path : string, optional + Group path within store. + meta_array : array-like, optional + An array instance to use for determining arrays to create and return + to users. Use `numpy.empty(())` by default. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + + Returns + ------- + g : group + The new group. + """ + + zarr_format = ( + _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + or _default_zarr_version() + ) + + store_path = make_store_path(store) + if path is not None: + store_path = store_path / path + + if chunk_store is not None: + warnings.warn("chunk_store is not yet implemented", RuntimeWarning, stacklevel=2) + if cache_attrs is not None: + warnings.warn("cache_attrs is not yet implemented", RuntimeWarning, stacklevel=2) + if synchronizer is not None: + warnings.warn("synchronizer is not yet implemented", RuntimeWarning, stacklevel=2) + if meta_array is not None: + warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2) + + if attributes is None: + attributes = {} + + try: + return await AsyncGroup.open(store=store_path, zarr_format=zarr_format) + except (KeyError, FileNotFoundError): + return await AsyncGroup.create( + store=store_path, + zarr_format=zarr_format, + exists_ok=overwrite, + attributes=attributes, + ) + + +async def open_group( + *, # Note: this is a change from v2 + store: StoreLike | None = None, + mode: OpenMode | None = None, # not used + cache_attrs: bool | None = None, # not used, default changed + synchronizer: Any = None, # not used + path: str | None = None, + chunk_store: StoreLike | None = None, # not used + storage_options: dict[str, Any] | None = None, # not used + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + meta_array: Any | None = None, # not used + attributes: dict[str, JSON] | None = None, +) -> AsyncGroup: + """Open a group using file-mode-like semantics. + + Parameters + ---------- + store : Store or string, optional + Store or path to directory in file system or name of zip file. + mode : {'r', 'r+', 'a', 'w', 'w-'}, optional + Persistence mode: 'r' means read only (must exist); 'r+' means + read/write (must exist); 'a' means read/write (create if doesn't + exist); 'w' means create (overwrite if exists); 'w-' means create + (fail if exists). + cache_attrs : bool, optional + If True (default), user attributes will be cached for attribute read + operations. If False, user attributes are reloaded from the store prior + to all attribute read operations. + synchronizer : object, optional + Array synchronizer. + path : string, optional + Group path within store. + chunk_store : Store or string, optional + Store or path to directory in file system or name of zip file. + storage_options : dict + If using an fsspec URL to create the store, these will be passed to + the backend implementation. Ignored otherwise. + meta_array : array-like, optional + An array instance to use for determining arrays to create and return + to users. Use `numpy.empty(())` by default. + + Returns + ------- + g : group + The new group. + """ + + zarr_format = ( + _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + or _default_zarr_version() + ) + + if cache_attrs is not None: + warnings.warn("cache_attrs is not yet implemented", RuntimeWarning, stacklevel=2) + if synchronizer is not None: + warnings.warn("synchronizer is not yet implemented", RuntimeWarning, stacklevel=2) + if meta_array is not None: + warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2) + if chunk_store is not None: + warnings.warn("chunk_store is not yet implemented", RuntimeWarning, stacklevel=2) + if storage_options is not None: + warnings.warn("storage_options is not yet implemented", RuntimeWarning, stacklevel=2) + + store_path = make_store_path(store, mode=mode) + if path is not None: + store_path = store_path / path + + if attributes is None: + attributes = {} + + try: + return await AsyncGroup.open(store_path, zarr_format=zarr_format) + except (KeyError, FileNotFoundError): + return await AsyncGroup.create( + store_path, zarr_format=zarr_format, exists_ok=True, attributes=attributes + ) + + +async def create( + shape: ChunkCoords, + *, # Note: this is a change from v2 + chunks: ChunkCoords | None = None, # TODO: v2 allowed chunks=True + dtype: npt.DTypeLike | None = None, + compressor: dict[str, JSON] | None = None, # TODO: default and type change + fill_value: Any = 0, # TODO: need type + order: MemoryOrder | None = None, # TODO: default change + store: str | StoreLike | None = None, + synchronizer: Any | None = None, + overwrite: bool = False, + path: PathLike | None = None, + chunk_store: StoreLike | None = None, + filters: list[dict[str, JSON]] | None = None, # TODO: type has changed + cache_metadata: bool | None = None, + cache_attrs: bool | None = None, + read_only: bool | None = None, + object_codec: Codec | None = None, # TODO: type has changed + dimension_separator: Literal[".", "/"] | None = None, + write_empty_chunks: bool = False, # TODO: default has changed + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + meta_array: Any | None = None, # TODO: need type + attributes: dict[str, JSON] | None = None, + # v3 only + chunk_shape: ChunkCoords | None = None, + chunk_key_encoding: ( + ChunkKeyEncoding + | tuple[Literal["default"], Literal[".", "/"]] + | tuple[Literal["v2"], Literal[".", "/"]] + | None + ) = None, + codecs: Iterable[Codec | dict[str, JSON]] | None = None, + dimension_names: Iterable[str] | None = None, + **kwargs: Any, +) -> AsyncArray: + """Create an array. + + Parameters + ---------- + shape : int or tuple of ints + Array shape. + chunks : int or tuple of ints, optional + Chunk shape. If True, will be guessed from `shape` and `dtype`. If + False, will be set to `shape`, i.e., single chunk for the whole array. + If an int, the chunk size in each dimension will be given by the value + of `chunks`. Default is True. + dtype : string or dtype, optional + NumPy dtype. + compressor : Codec, optional + Primary compressor. + fill_value : object + Default value to use for uninitialized portions of the array. + order : {'C', 'F'}, optional + Memory layout to be used within each chunk. + store : Store or string + Store or path to directory in file system or name of zip file. + synchronizer : object, optional + Array synchronizer. + overwrite : bool, optional + If True, delete all pre-existing data in `store` at `path` before + creating the array. + path : string, optional + Path under which array is stored. + chunk_store : MutableMapping, optional + Separate storage for chunks. If not provided, `store` will be used + for storage of both chunks and metadata. + filters : sequence of Codecs, optional + Sequence of filters to use to encode chunk data prior to compression. + cache_metadata : bool, optional + If True, array configuration metadata will be cached for the + lifetime of the object. If False, array metadata will be reloaded + prior to all data access and modification operations (may incur + overhead depending on storage and data access pattern). + cache_attrs : bool, optional + If True (default), user attributes will be cached for attribute read + operations. If False, user attributes are reloaded from the store prior + to all attribute read operations. + read_only : bool, optional + True if array should be protected against modification. + object_codec : Codec, optional + A codec to encode object arrays, only needed if dtype=object. + dimension_separator : {'.', '/'}, optional + Separator placed between the dimensions of a chunk. + + .. versionadded:: 2.8 + + write_empty_chunks : bool, optional + If True (default), all chunks will be stored regardless of their + contents. If False, each chunk is compared to the array's fill value + prior to storing. If a chunk is uniformly equal to the fill value, then + that chunk is not be stored, and the store entry for that chunk's key + is deleted. This setting enables sparser storage, as only chunks with + non-fill-value data are stored, at the expense of overhead associated + with checking the data of each chunk. + + .. versionadded:: 2.11 + + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + meta_array : array-like, optional + An array instance to use for determining arrays to create and return + to users. Use `numpy.empty(())` by default. + + .. versionadded:: 2.13 + + Returns + ------- + z : array + The array. + """ + zarr_format = ( + _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + or _default_zarr_version() + ) + + if zarr_format == 2 and chunks is None: + chunks = shape + if zarr_format == 3 and chunk_shape is None: + chunk_shape = shape + + if order is not None: + warnings.warn( + "order is deprecated, use config `array.order` instead", + DeprecationWarning, + stacklevel=2, + ) + if synchronizer is not None: + warnings.warn("synchronizer is not yet implemented", RuntimeWarning, stacklevel=2) + if chunk_store is not None: + warnings.warn("chunk_store is not yet implemented", RuntimeWarning, stacklevel=2) + if cache_metadata is not None: + warnings.warn("cache_metadata is not yet implemented", RuntimeWarning, stacklevel=2) + if cache_attrs is not None: + warnings.warn("cache_attrs is not yet implemented", RuntimeWarning, stacklevel=2) + if object_codec is not None: + warnings.warn("object_codec is not yet implemented", RuntimeWarning, stacklevel=2) + if dimension_separator is not None: + if zarr_format == 3: + raise ValueError( + "dimension_separator is not supported for zarr format 3, use chunk_key_encoding instead" + ) + else: + warnings.warn( + "dimension_separator is not yet implemented", RuntimeWarning, stacklevel=2 + ) + if write_empty_chunks: + warnings.warn("write_empty_chunks is not yet implemented", RuntimeWarning, stacklevel=2) + if meta_array is not None: + warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2) + + mode = cast(OpenMode, "r" if read_only else "w") + store_path = make_store_path(store, mode=mode) + if path is not None: + store_path = store_path / path + + return await AsyncArray.create( + store_path, + shape=shape, + chunks=chunks, + dtype=dtype, + compressor=compressor, + fill_value=fill_value, + exists_ok=overwrite, # TODO: name change + filters=filters, + dimension_separator=dimension_separator, + zarr_format=zarr_format, + chunk_shape=chunk_shape, + chunk_key_encoding=chunk_key_encoding, + codecs=codecs, + dimension_names=dimension_names, + attributes=attributes, + **kwargs, + ) + + +async def empty(shape: ChunkCoords, **kwargs: Any) -> AsyncArray: + """Create an empty array. + + Parameters + ---------- + shape : int or tuple of int + Shape of the empty array. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Notes + ----- + The contents of an empty Zarr array are not defined. On attempting to + retrieve data from an empty Zarr array, any values may be returned, + and these are not guaranteed to be stable from one access to the next. + """ + return await create(shape=shape, fill_value=None, **kwargs) + + +async def empty_like(a: ArrayLike, **kwargs: Any) -> AsyncArray: + """Create an empty array like `a`. + + Parameters + ---------- + a : array-like + The array to create an empty array like. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ + like_kwargs = _like_args(a, kwargs) + return await empty(**like_kwargs) + + +# TODO: add type annotations for fill_value and kwargs +async def full(shape: ChunkCoords, fill_value: Any, **kwargs: Any) -> AsyncArray: + """Create an array, with `fill_value` being used as the default value for + uninitialized portions of the array. + + Parameters + ---------- + shape : int or tuple of int + Shape of the empty array. + fill_value : scalar + Fill value. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ + return await create(shape=shape, fill_value=fill_value, **kwargs) + + +# TODO: add type annotations for kwargs +async def full_like(a: ArrayLike, **kwargs: Any) -> AsyncArray: + """Create a filled array like `a`. + + Parameters + ---------- + a : array-like + The array to create an empty array like. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ + like_kwargs = _like_args(a, kwargs) + if isinstance(a, AsyncArray): + kwargs.setdefault("fill_value", a.metadata.fill_value) + return await full(**like_kwargs) + + +async def ones(shape: ChunkCoords, **kwargs: Any) -> AsyncArray: + """Create an array, with one being used as the default value for + uninitialized portions of the array. + + Parameters + ---------- + shape : int or tuple of int + Shape of the empty array. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ + return await create(shape=shape, fill_value=1, **kwargs) + + +async def ones_like(a: ArrayLike, **kwargs: Any) -> AsyncArray: + """Create an array of ones like `a`. + + Parameters + ---------- + a : array-like + The array to create an empty array like. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ + like_kwargs = _like_args(a, kwargs) + return await ones(**like_kwargs) + + +async def open_array( + *, # note: this is a change from v2 + store: StoreLike | None = None, + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + path: PathLike | None = None, + **kwargs: Any, # TODO: type kwargs as valid args to save +) -> AsyncArray: + """Open an array using file-mode-like semantics. + + Parameters + ---------- + store : Store or string + Store or path to directory in file system or name of zip file. + zarr_format : {2, 3, None}, optional + The zarr format to use when saving. + path : string, optional + Path in store to array. + **kwargs + Any keyword arguments to pass to the array constructor. + + Returns + ------- + AsyncArray + The opened array. + """ + + store_path = make_store_path(store) + if path is not None: + store_path = store_path / path + + zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + + try: + return await AsyncArray.open(store_path, zarr_format=zarr_format) + except KeyError as e: + if store_path.store.writeable: + pass + else: + raise e + + # if array was not found, create it + return await create(store=store, path=path, zarr_format=zarr_format, **kwargs) + + +async def open_like(a: ArrayLike, path: str, **kwargs: Any) -> AsyncArray: + """Open a persistent array like `a`. + + Parameters + ---------- + a : Array + The shape and data-type of a define these same attributes of the returned array. + path : str + The path to the new array. + **kwargs + Any keyword arguments to pass to the array constructor. + + Returns + ------- + AsyncArray + The opened array. + """ + like_kwargs = _like_args(a, kwargs) + if isinstance(a, (AsyncArray | Array)): + kwargs.setdefault("fill_value", a.metadata.fill_value) + return await open_array(path=path, **like_kwargs) + + +async def zeros(shape: ChunkCoords, **kwargs: Any) -> AsyncArray: + """Create an array, with zero being used as the default value for + uninitialized portions of the array. + + Parameters + ---------- + shape : int or tuple of int + Shape of the empty array. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ + return await create(shape=shape, fill_value=0, **kwargs) + + +async def zeros_like(a: ArrayLike, **kwargs: Any) -> AsyncArray: + """Create an array of zeros like `a`. + + Parameters + ---------- + a : array-like + The array to create an empty array like. + **kwargs + Keyword arguments passed to :func:`zarr.api.asynchronous.create`. + + Returns + ------- + Array + The new array. + """ + like_kwargs = _like_args(a, kwargs) + return await zeros(**like_kwargs) diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py new file mode 100644 index 0000000000..57b9d5630f --- /dev/null +++ b/src/zarr/api/synchronous.py @@ -0,0 +1,273 @@ +from __future__ import annotations + +from typing import Any + +import zarr.api.asynchronous as async_api +from zarr.array import Array, AsyncArray +from zarr.buffer import NDArrayLike +from zarr.common import JSON, ChunkCoords, OpenMode, ZarrFormat +from zarr.group import Group +from zarr.store import StoreLike +from zarr.sync import sync + + +def consolidate_metadata(*args: Any, **kwargs: Any) -> Group: + return Group(sync(async_api.consolidate_metadata(*args, **kwargs))) + + +def copy(*args: Any, **kwargs: Any) -> tuple[int, int, int]: + return sync(async_api.copy(*args, **kwargs)) + + +def copy_all(*args: Any, **kwargs: Any) -> tuple[int, int, int]: + return sync(async_api.copy_all(*args, **kwargs)) + + +def copy_store(*args: Any, **kwargs: Any) -> tuple[int, int, int]: + return sync(async_api.copy_store(*args, **kwargs)) + + +def load( + store: StoreLike, zarr_version: ZarrFormat | None = None, path: str | None = None +) -> NDArrayLike | dict[str, NDArrayLike]: + return sync(async_api.load(store=store, zarr_version=zarr_version, path=path)) + + +def open( + *, + store: StoreLike | None = None, + mode: OpenMode | None = None, # type and value changed + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + path: str | None = None, + **kwargs: Any, # TODO: type kwargs as valid args to async_api.open +) -> Array | Group: + obj = sync( + async_api.open( + store=store, + mode=mode, + zarr_version=zarr_version, + zarr_format=zarr_format, + path=path, + **kwargs, + ) + ) + if isinstance(obj, AsyncArray): + return Array(obj) + else: + return Group(obj) + + +def open_consolidated(*args: Any, **kwargs: Any) -> Group: + return Group(sync(async_api.open_consolidated(*args, **kwargs))) + + +def save( + store: StoreLike, + *args: NDArrayLike, + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + path: str | None = None, + **kwargs: Any, # TODO: type kwargs as valid args to async_api.save +) -> None: + return sync( + async_api.save( + store, *args, zarr_version=zarr_version, zarr_format=zarr_format, path=path, **kwargs + ) + ) + + +def save_array( + store: StoreLike, + arr: NDArrayLike, + *, + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + path: str | None = None, + **kwargs: Any, # TODO: type kwargs as valid args to async_api.save_array +) -> None: + return sync( + async_api.save_array( + store=store, + arr=arr, + zarr_version=zarr_version, + zarr_format=zarr_format, + path=path, + **kwargs, + ) + ) + + +def save_group( + store: StoreLike, + *args: NDArrayLike, + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + path: str | None = None, + **kwargs: NDArrayLike, +) -> None: + return sync( + async_api.save_group( + store, + *args, + zarr_version=zarr_version, + zarr_format=zarr_format, + path=path, + **kwargs, + ) + ) + + +def tree(*args: Any, **kwargs: Any) -> None: + return sync(async_api.tree(*args, **kwargs)) + + +# TODO: add type annotations for kwargs +def array(data: NDArrayLike, **kwargs: Any) -> Array: + return Array(sync(async_api.array(data=data, **kwargs))) + + +def group( + *, # Note: this is a change from v2 + store: StoreLike | None = None, + overwrite: bool = False, + chunk_store: StoreLike | None = None, # not used in async_api + cache_attrs: bool | None = None, # default changed, not used in async_api + synchronizer: Any | None = None, # not used in async_api + path: str | None = None, + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + meta_array: Any | None = None, # not used in async_api + attributes: dict[str, JSON] | None = None, +) -> Group: + return Group( + sync( + async_api.group( + store=store, + overwrite=overwrite, + chunk_store=chunk_store, + cache_attrs=cache_attrs, + synchronizer=synchronizer, + path=path, + zarr_version=zarr_version, + zarr_format=zarr_format, + meta_array=meta_array, + attributes=attributes, + ) + ) + ) + + +def open_group( + *, # Note: this is a change from v2 + store: StoreLike | None = None, + mode: OpenMode | None = None, # not used in async api + cache_attrs: bool | None = None, # default changed, not used in async api + synchronizer: Any = None, # not used in async api + path: str | None = None, + chunk_store: StoreLike | None = None, # not used in async api + storage_options: dict[str, Any] | None = None, # not used in async api + zarr_version: ZarrFormat | None = None, # deprecated + zarr_format: ZarrFormat | None = None, + meta_array: Any | None = None, # not used in async api +) -> Group: + return Group( + sync( + async_api.open_group( + store=store, + mode=mode, + cache_attrs=cache_attrs, + synchronizer=synchronizer, + path=path, + chunk_store=chunk_store, + storage_options=storage_options, + zarr_version=zarr_version, + zarr_format=zarr_format, + meta_array=meta_array, + ) + ) + ) + + +# TODO: add type annotations for kwargs +def create(*args: Any, **kwargs: Any) -> Array: + return Array(sync(async_api.create(*args, **kwargs))) + + +# TODO: add type annotations for kwargs +def empty(shape: ChunkCoords, **kwargs: Any) -> Array: + return Array(sync(async_api.empty(shape, **kwargs))) + + +# TODO: move ArrayLike to common module +# TODO: add type annotations for kwargs +def empty_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: + return Array(sync(async_api.empty_like(a, **kwargs))) + + +# TODO: add type annotations for kwargs and fill_value +def full(shape: ChunkCoords, fill_value: Any, **kwargs: Any) -> Array: + return Array(sync(async_api.full(shape=shape, fill_value=fill_value, **kwargs))) + + +# TODO: move ArrayLike to common module +# TODO: add type annotations for kwargs +def full_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: + return Array(sync(async_api.full_like(a, **kwargs))) + + +# TODO: add type annotations for kwargs +def ones(shape: ChunkCoords, **kwargs: Any) -> Array: + return Array(sync(async_api.ones(shape, **kwargs))) + + +# TODO: add type annotations for kwargs +def ones_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: + return Array(sync(async_api.ones_like(a, **kwargs))) + + +# TODO: update this once async_api.open_array is fully implemented +def open_array(*args: Any, **kwargs: Any) -> Array: + return Array(sync(async_api.open_array(*args, **kwargs))) + + +# TODO: add type annotations for kwargs +def open_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: + return Array(sync(async_api.open_like(a, **kwargs))) + + +# TODO: add type annotations for kwargs +def zeros(*args: Any, **kwargs: Any) -> Array: + return Array(sync(async_api.zeros(*args, **kwargs))) + + +# TODO: add type annotations for kwargs +def zeros_like(a: async_api.ArrayLike, **kwargs: Any) -> Array: + return Array(sync(async_api.zeros_like(a, **kwargs))) + + +consolidate_metadata.__doc__ = async_api.copy.__doc__ +copy.__doc__ = async_api.copy.__doc__ +copy_all.__doc__ = async_api.copy_all.__doc__ +copy_store.__doc__ = async_api.copy_store.__doc__ +load.__doc__ = async_api.load.__doc__ +open.__doc__ = async_api.open.__doc__ +open_consolidated.__doc__ = async_api.open_consolidated.__doc__ +save.__doc__ = async_api.save.__doc__ +save_array.__doc__ = async_api.save_array.__doc__ +save_group.__doc__ = async_api.save_group.__doc__ +tree.__doc__ = async_api.tree.__doc__ +array.__doc__ = async_api.array.__doc__ +group.__doc__ = async_api.group.__doc__ +open_group.__doc__ = async_api.open_group.__doc__ +create.__doc__ = async_api.create.__doc__ +empty.__doc__ = async_api.empty.__doc__ +empty_like.__doc__ = async_api.empty_like.__doc__ +full.__doc__ = async_api.full.__doc__ +full_like.__doc__ = async_api.full_like.__doc__ +ones.__doc__ = async_api.ones.__doc__ +ones_like.__doc__ = async_api.ones_like.__doc__ +open_array.__doc__ = async_api.open_array.__doc__ +open_like.__doc__ = async_api.open_like.__doc__ +zeros.__doc__ = async_api.zeros.__doc__ +zeros_like.__doc__ = async_api.zeros_like.__doc__ diff --git a/src/zarr/array.py b/src/zarr/array.py index 28b19f44f0..9ac1ce41ec 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -387,6 +387,10 @@ def dtype(self) -> np.dtype[Any]: def attrs(self) -> dict[str, JSON]: return self.metadata.attributes + @property + def read_only(self) -> bool: + return bool(~self.store_path.store.writeable) + @property def path(self) -> str: """Storage path.""" @@ -693,6 +697,10 @@ def store_path(self) -> StorePath: def order(self) -> Literal["C", "F"]: return self._async_array.order + @property + def read_only(self) -> bool: + return self._async_array.read_only + def __getitem__(self, selection: Selection) -> NDArrayLike: fields, pure_selection = pop_fields(selection) if is_pure_fancy_indexing(pure_selection, self.ndim): diff --git a/src/zarr/common.py b/src/zarr/common.py index bca9f171af..9349f9f018 100644 --- a/src/zarr/common.py +++ b/src/zarr/common.py @@ -34,6 +34,7 @@ Selection = slice | SliceSelection ZarrFormat = Literal[2, 3] JSON = None | str | int | float | Enum | dict[str, "JSON"] | list["JSON"] | tuple["JSON", ...] +MemoryOrder = Literal["C", "F"] OpenMode = Literal["r", "r+", "a", "w", "w-"] @@ -134,7 +135,9 @@ def parse_named_configuration( return name_parsed, configuration_parsed -def parse_shapelike(data: Iterable[int]) -> tuple[int, ...]: +def parse_shapelike(data: int | Iterable[int]) -> tuple[int, ...]: + if isinstance(data, int): + return (data,) if not isinstance(data, Iterable): raise TypeError(f"Expected an iterable. Got {data} instead.") data_tuple = tuple(data) diff --git a/src/zarr/convenience.py b/src/zarr/convenience.py new file mode 100644 index 0000000000..be0a6b2813 --- /dev/null +++ b/src/zarr/convenience.py @@ -0,0 +1,35 @@ +import warnings + +from zarr.api.synchronous import ( + consolidate_metadata, + copy, + copy_all, + copy_store, + load, + open, + open_consolidated, + save, + save_array, + save_group, + tree, +) + +warnings.warn( + "zarr.convenience is deprecated, use zarr.api.synchronous", + DeprecationWarning, + stacklevel=2, +) + +__all__ = [ + "open", + "save_array", + "save_group", + "save", + "load", + "tree", + "copy_store", + "copy", + "copy_all", + "consolidate_metadata", + "open_consolidated", +] diff --git a/src/zarr/creation.py b/src/zarr/creation.py new file mode 100644 index 0000000000..df3f764610 --- /dev/null +++ b/src/zarr/creation.py @@ -0,0 +1,37 @@ +import warnings + +from zarr.api.synchronous import ( + array, + create, + empty, + empty_like, + full, + full_like, + ones, + ones_like, + open_array, + open_like, + zeros, + zeros_like, +) + +warnings.warn( + "zarr.creation is deprecated, use zarr.api.synchronous", + DeprecationWarning, + stacklevel=2, +) + +__all__ = [ + "create", + "empty", + "zeros", + "ones", + "full", + "array", + "open_array", + "empty_like", + "zeros_like", + "ones_like", + "full_like", + "open_like", +] diff --git a/src/zarr/group.py b/src/zarr/group.py index ccad0e5661..4bb4b6b4dd 100644 --- a/src/zarr/group.py +++ b/src/zarr/group.py @@ -200,7 +200,7 @@ async def getitem( key: str, ) -> AsyncArray | AsyncGroup: store_path = self.store_path / key - logger.warning("key=%s, store_path=%s", key, store_path) + logger.debug("key=%s, store_path=%s", key, store_path) # Note: # in zarr-python v2, we first check if `key` references an Array, else if `key` references @@ -316,7 +316,7 @@ async def create_array( self, path: str, shape: ChunkCoords, - dtype: npt.DTypeLike, + dtype: npt.DTypeLike = "float64", fill_value: Any | None = None, attributes: dict[str, JSON] | None = None, # v3 only diff --git a/src/zarr/metadata.py b/src/zarr/metadata.py index bcb70bd4b2..8329bd9200 100644 --- a/src/zarr/metadata.py +++ b/src/zarr/metadata.py @@ -29,6 +29,7 @@ ZARRAY_JSON, ZATTRS_JSON, ChunkCoords, + ZarrFormat, parse_dtype, parse_fill_value, parse_shapelike, @@ -115,9 +116,10 @@ def from_dtype(cls, dtype: np.dtype[Any]) -> DataType: @dataclass(frozen=True, kw_only=True) class ArrayMetadata(Metadata, ABC): shape: ChunkCoords - chunk_grid: ChunkGrid fill_value: Any + chunk_grid: ChunkGrid attributes: dict[str, JSON] + zarr_format: ZarrFormat @property @abstractmethod diff --git a/src/zarr/store/__init__.py b/src/zarr/store/__init__.py index b7cd6cc0fd..fbdcdb9255 100644 --- a/src/zarr/store/__init__.py +++ b/src/zarr/store/__init__.py @@ -1,7 +1,6 @@ -# flake8: noqa -from zarr.store.core import StorePath, StoreLike, make_store_path -from zarr.store.remote import RemoteStore +from zarr.store.core import StoreLike, StorePath, make_store_path from zarr.store.local import LocalStore from zarr.store.memory import MemoryStore +from zarr.store.remote import RemoteStore __all__ = ["StorePath", "StoreLike", "make_store_path", "RemoteStore", "LocalStore", "MemoryStore"] diff --git a/src/zarr/store/core.py b/src/zarr/store/core.py index 70c39db1b7..5fbde208f4 100644 --- a/src/zarr/store/core.py +++ b/src/zarr/store/core.py @@ -7,6 +7,7 @@ from zarr.buffer import Buffer, BufferPrototype, default_buffer_prototype from zarr.common import OpenMode from zarr.store.local import LocalStore +from zarr.store.memory import MemoryStore def _dereference_path(root: str, path: str) -> str: @@ -65,7 +66,7 @@ def __eq__(self, other: Any) -> bool: StoreLike = Store | StorePath | Path | str -def make_store_path(store_like: StoreLike, *, mode: OpenMode | None = None) -> StorePath: +def make_store_path(store_like: StoreLike | None, *, mode: OpenMode | None = None) -> StorePath: if isinstance(store_like, StorePath): if mode is not None: assert mode == store_like.store.mode @@ -74,31 +75,10 @@ def make_store_path(store_like: StoreLike, *, mode: OpenMode | None = None) -> S if mode is not None: assert mode == store_like.mode return StorePath(store_like) + elif store_like is None: + if mode is None: + mode = "w" # exception to the default mode = 'r' + return StorePath(MemoryStore(mode=mode)) elif isinstance(store_like, str): - assert mode is not None - return StorePath(LocalStore(Path(store_like), mode=mode)) + return StorePath(LocalStore(Path(store_like), mode=mode or "r")) raise TypeError - - -def _normalize_interval_index( - data: Buffer, interval: None | tuple[int | None, int | None] -) -> tuple[int, int]: - """ - Convert an implicit interval into an explicit start and length - """ - if interval is None: - start = 0 - length = len(data) - else: - maybe_start, maybe_len = interval - if maybe_start is None: - start = 0 - else: - start = maybe_start - - if maybe_len is None: - length = len(data) - start - else: - length = maybe_len - - return (start, length) diff --git a/src/zarr/store/memory.py b/src/zarr/store/memory.py index d75e8c348c..43d65ce836 100644 --- a/src/zarr/store/memory.py +++ b/src/zarr/store/memory.py @@ -5,7 +5,7 @@ from zarr.abc.store import Store from zarr.buffer import Buffer, BufferPrototype from zarr.common import OpenMode, concurrent_map -from zarr.store.core import _normalize_interval_index +from zarr.store.utils import _normalize_interval_index # TODO: this store could easily be extended to wrap any MutableMapping store from v2 diff --git a/src/zarr/store/utils.py b/src/zarr/store/utils.py new file mode 100644 index 0000000000..17c9234221 --- /dev/null +++ b/src/zarr/store/utils.py @@ -0,0 +1,25 @@ +from zarr.buffer import Buffer + + +def _normalize_interval_index( + data: Buffer, interval: None | tuple[int | None, int | None] +) -> tuple[int, int]: + """ + Convert an implicit interval into an explicit start and length + """ + if interval is None: + start = 0 + length = len(data) + else: + maybe_start, maybe_len = interval + if maybe_start is None: + start = 0 + else: + start = maybe_start + + if maybe_len is None: + length = len(data) - start + else: + length = maybe_len + + return (start, length) diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index 5929f47049..3c7f082099 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -4,7 +4,7 @@ from zarr.abc.store import Store from zarr.buffer import Buffer, default_buffer_prototype -from zarr.store.core import _normalize_interval_index +from zarr.store.utils import _normalize_interval_index from zarr.testing.utils import assert_bytes_equal S = TypeVar("S", bound=Store) diff --git a/tests/v3/test_api.py b/tests/v3/test_api.py new file mode 100644 index 0000000000..31e6fbfcd9 --- /dev/null +++ b/tests/v3/test_api.py @@ -0,0 +1,778 @@ +import numpy as np +import pytest +from numpy.testing import assert_array_equal + +import zarr +from zarr import Array, Group +from zarr.abc.store import Store +from zarr.api.synchronous import load, open, open_group, save, save_array, save_group + + +def test_open_array(memory_store: Store) -> None: + store = memory_store + + # open array, create if doesn't exist + z = open(store=store, shape=100) + assert isinstance(z, Array) + assert z.shape == (100,) + + # open array, overwrite + store._store_dict = {} + z = open(store=store, shape=200, mode="w") # mode="w" + assert isinstance(z, Array) + assert z.shape == (200,) + + # open array, read-only + ro_store = type(store)(store_dict=store._store_dict, mode="r") + z = open(store=ro_store) + assert isinstance(z, Array) + assert z.shape == (200,) + assert z.read_only + + # path not found + with pytest.raises(ValueError): + open(store="doesnotexist", mode="r") + + +def test_open_group(memory_store: Store) -> None: + store = memory_store + + # open group, create if doesn't exist + g = open_group(store=store) + g.create_group("foo") + assert isinstance(g, Group) + assert "foo" in g + + # open group, overwrite + # g = open_group(store=store) + # assert isinstance(g, Group) + # assert "foo" not in g + + # open group, read-only + ro_store = type(store)(store_dict=store._store_dict, mode="r") + g = open_group(store=ro_store) + assert isinstance(g, Group) + # assert g.read_only + + +def test_save_errors() -> None: + with pytest.raises(ValueError): + # no arrays provided + save_group("data/group.zarr") + with pytest.raises(TypeError): + # no array provided + save_array("data/group.zarr") + with pytest.raises(ValueError): + # no arrays provided + save("data/group.zarr") + + +# def test_lazy_loader(): +# foo = np.arange(100) +# bar = np.arange(100, 0, -1) +# store = "data/group.zarr" +# save(store, foo=foo, bar=bar) +# loader = load(store) +# assert "foo" in loader +# assert "bar" in loader +# assert "baz" not in loader +# assert len(loader) == 2 +# assert sorted(loader) == ["bar", "foo"] +# assert_array_equal(foo, loader["foo"]) +# assert_array_equal(bar, loader["bar"]) +# assert "LazyLoader: " in repr(loader) + + +def test_load_array(memory_store: Store) -> None: + store = memory_store + foo = np.arange(100) + bar = np.arange(100, 0, -1) + save(store, foo=foo, bar=bar) + + # can also load arrays directly into a numpy array + for array_name in ["foo", "bar"]: + array = load(store, path=array_name) + assert isinstance(array, np.ndarray) + if array_name == "foo": + assert_array_equal(foo, array) + else: + assert_array_equal(bar, array) + + +def test_tree() -> None: + g1 = zarr.group() + g1.create_group("foo") + g3 = g1.create_group("bar") + g3.create_group("baz") + g5 = g3.create_group("qux") + g5.create_array("baz", shape=100, chunks=10) + # TODO: complete after tree has been reimplemented + # assert repr(zarr.tree(g1)) == repr(g1.tree()) + # assert str(zarr.tree(g1)) == str(g1.tree()) + + +# @pytest.mark.parametrize("stores_from_path", [False, True]) +# @pytest.mark.parametrize( +# "with_chunk_store,listable", +# [(False, True), (True, True), (False, False)], +# ids=["default-listable", "with_chunk_store-listable", "default-unlistable"], +# ) +# def test_consolidate_metadata(with_chunk_store, listable, monkeypatch, stores_from_path): +# # setup initial data +# if stores_from_path: +# store = tempfile.mkdtemp() +# atexit.register(atexit_rmtree, store) +# if with_chunk_store: +# chunk_store = tempfile.mkdtemp() +# atexit.register(atexit_rmtree, chunk_store) +# else: +# chunk_store = None +# else: +# store = MemoryStore() +# chunk_store = MemoryStore() if with_chunk_store else None +# path = None +# z = group(store, chunk_store=chunk_store, path=path) + +# # Reload the actual store implementation in case str +# store_to_copy = z.store + +# z.create_group("g1") +# g2 = z.create_group("g2") +# g2.attrs["hello"] = "world" +# arr = g2.create_array("arr", shape=(20, 20), chunks=(5, 5), dtype="f8") +# assert 16 == arr.nchunks +# assert 0 == arr.nchunks_initialized +# arr.attrs["data"] = 1 +# arr[:] = 1.0 +# assert 16 == arr.nchunks_initialized + +# if stores_from_path: +# # get the actual store class for use with consolidate_metadata +# store_class = z._store +# else: +# store_class = store + +# # perform consolidation +# out = consolidate_metadata(store_class, path=path) +# assert isinstance(out, Group) +# assert ["g1", "g2"] == list(out) +# if not stores_from_path: +# assert isinstance(out._store, ConsolidatedMetadataStore) +# assert ".zmetadata" in store +# meta_keys = [ +# ".zgroup", +# "g1/.zgroup", +# "g2/.zgroup", +# "g2/.zattrs", +# "g2/arr/.zarray", +# "g2/arr/.zattrs", +# ] + +# for key in meta_keys: +# del store[key] + +# # https://github.com/zarr-developers/zarr-python/issues/993 +# # Make sure we can still open consolidated on an unlistable store: +# if not listable: +# fs_memory = pytest.importorskip("fsspec.implementations.memory") +# monkeypatch.setattr(fs_memory.MemoryFileSystem, "isdir", lambda x, y: False) +# monkeypatch.delattr(fs_memory.MemoryFileSystem, "ls") +# fs = fs_memory.MemoryFileSystem() +# store_to_open = FSStore("", fs=fs) +# # copy original store to new unlistable store +# store_to_open.update(store_to_copy) + +# else: +# store_to_open = store + +# # open consolidated +# z2 = open_consolidated(store_to_open, chunk_store=chunk_store, path=path) +# assert ["g1", "g2"] == list(z2) +# assert "world" == z2.g2.attrs["hello"] +# assert 1 == z2.g2.arr.attrs["data"] +# assert (z2.g2.arr[:] == 1.0).all() +# assert 16 == z2.g2.arr.nchunks +# if listable: +# assert 16 == z2.g2.arr.nchunks_initialized +# else: +# with pytest.raises(NotImplementedError): +# _ = z2.g2.arr.nchunks_initialized + +# if stores_from_path: +# # path string is note a BaseStore subclass so cannot be used to +# # initialize a ConsolidatedMetadataStore. + +# with pytest.raises(ValueError): +# cmd = ConsolidatedMetadataStore(store) +# else: +# # tests del/write on the store + +# cmd = ConsolidatedMetadataStore(store) +# with pytest.raises(PermissionError): +# del cmd[".zgroup"] +# with pytest.raises(PermissionError): +# cmd[".zgroup"] = None + +# # test getsize on the store +# assert isinstance(getsize(cmd), Integral) + +# # test new metadata are not writeable +# with pytest.raises(PermissionError): +# z2.create_group("g3") +# with pytest.raises(PermissionError): +# z2.create_dataset("spam", shape=42, chunks=7, dtype="i4") +# with pytest.raises(PermissionError): +# del z2["g2"] + +# # test consolidated metadata are not writeable +# with pytest.raises(PermissionError): +# z2.g2.attrs["hello"] = "universe" +# with pytest.raises(PermissionError): +# z2.g2.arr.attrs["foo"] = "bar" + +# # test the data are writeable +# z2.g2.arr[:] = 2 +# assert (z2.g2.arr[:] == 2).all() + +# # test invalid modes +# with pytest.raises(ValueError): +# open_consolidated(store, chunk_store=chunk_store, mode="a", path=path) +# with pytest.raises(ValueError): +# open_consolidated(store, chunk_store=chunk_store, mode="w", path=path) +# with pytest.raises(ValueError): +# open_consolidated(store, chunk_store=chunk_store, mode="w-", path=path) + +# # make sure keyword arguments are passed through without error +# open_consolidated( +# store, +# chunk_store=chunk_store, +# path=path, +# cache_attrs=True, +# synchronizer=None, +# ) + + +# @pytest.mark.parametrize( +# "options", +# ( +# {"dimension_separator": "/"}, +# {"dimension_separator": "."}, +# {"dimension_separator": None}, +# ), +# ) +# def test_save_array_separator(tmpdir, options): +# data = np.arange(6).reshape((3, 2)) +# url = tmpdir.join("test.zarr") +# save_array(url, data, **options) + + +# class TestCopyStore(unittest.TestCase): +# _version = 2 + +# def setUp(self): +# source = dict() +# source["foo"] = b"xxx" +# source["bar/baz"] = b"yyy" +# source["bar/qux"] = b"zzz" +# self.source = source + +# def _get_dest_store(self): +# return dict() + +# def test_no_paths(self): +# source = self.source +# dest = self._get_dest_store() +# copy_store(source, dest) +# assert len(source) == len(dest) +# for key in source: +# assert source[key] == dest[key] + +# def test_source_path(self): +# source = self.source +# # paths should be normalized +# for source_path in "bar", "bar/", "/bar", "/bar/": +# dest = self._get_dest_store() +# copy_store(source, dest, source_path=source_path) +# assert 2 == len(dest) +# for key in source: +# if key.startswith("bar/"): +# dest_key = key.split("bar/")[1] +# assert source[key] == dest[dest_key] +# else: +# assert key not in dest + +# def test_dest_path(self): +# source = self.source +# # paths should be normalized +# for dest_path in "new", "new/", "/new", "/new/": +# dest = self._get_dest_store() +# copy_store(source, dest, dest_path=dest_path) +# assert len(source) == len(dest) +# for key in source: +# if self._version == 3: +# dest_key = key[:10] + "new/" + key[10:] +# else: +# dest_key = "new/" + key +# assert source[key] == dest[dest_key] + +# def test_source_dest_path(self): +# source = self.source +# # paths should be normalized +# for source_path in "bar", "bar/", "/bar", "/bar/": +# for dest_path in "new", "new/", "/new", "/new/": +# dest = self._get_dest_store() +# copy_store(source, dest, source_path=source_path, dest_path=dest_path) +# assert 2 == len(dest) +# for key in source: +# if key.startswith("bar/"): +# dest_key = "new/" + key.split("bar/")[1] +# assert source[key] == dest[dest_key] +# else: +# assert key not in dest +# assert ("new/" + key) not in dest + +# def test_excludes_includes(self): +# source = self.source + +# # single excludes +# dest = self._get_dest_store() +# excludes = "f.*" +# copy_store(source, dest, excludes=excludes) +# assert len(dest) == 2 + +# root = "" +# assert root + "foo" not in dest + +# # multiple excludes +# dest = self._get_dest_store() +# excludes = "b.z", ".*x" +# copy_store(source, dest, excludes=excludes) +# assert len(dest) == 1 +# assert root + "foo" in dest +# assert root + "bar/baz" not in dest +# assert root + "bar/qux" not in dest + +# # excludes and includes +# dest = self._get_dest_store() +# excludes = "b.*" +# includes = ".*x" +# copy_store(source, dest, excludes=excludes, includes=includes) +# assert len(dest) == 2 +# assert root + "foo" in dest +# assert root + "bar/baz" not in dest +# assert root + "bar/qux" in dest + +# def test_dry_run(self): +# source = self.source +# dest = self._get_dest_store() +# copy_store(source, dest, dry_run=True) +# assert 0 == len(dest) + +# def test_if_exists(self): +# source = self.source +# dest = self._get_dest_store() +# root = "" +# dest[root + "bar/baz"] = b"mmm" + +# # default ('raise') +# with pytest.raises(CopyError): +# copy_store(source, dest) + +# # explicit 'raise' +# with pytest.raises(CopyError): +# copy_store(source, dest, if_exists="raise") + +# # skip +# copy_store(source, dest, if_exists="skip") +# assert 3 == len(dest) +# assert dest[root + "foo"] == b"xxx" +# assert dest[root + "bar/baz"] == b"mmm" +# assert dest[root + "bar/qux"] == b"zzz" + +# # replace +# copy_store(source, dest, if_exists="replace") +# assert 3 == len(dest) +# assert dest[root + "foo"] == b"xxx" +# assert dest[root + "bar/baz"] == b"yyy" +# assert dest[root + "bar/qux"] == b"zzz" + +# # invalid option +# with pytest.raises(ValueError): +# copy_store(source, dest, if_exists="foobar") + + +# def check_copied_array(original, copied, without_attrs=False, expect_props=None): +# # setup +# source_h5py = original.__module__.startswith("h5py.") +# dest_h5py = copied.__module__.startswith("h5py.") +# zarr_to_zarr = not (source_h5py or dest_h5py) +# h5py_to_h5py = source_h5py and dest_h5py +# zarr_to_h5py = not source_h5py and dest_h5py +# h5py_to_zarr = source_h5py and not dest_h5py +# if expect_props is None: +# expect_props = dict() +# else: +# expect_props = expect_props.copy() + +# # common properties in zarr and h5py +# for p in "dtype", "shape", "chunks": +# expect_props.setdefault(p, getattr(original, p)) + +# # zarr-specific properties +# if zarr_to_zarr: +# for p in "compressor", "filters", "order", "fill_value": +# expect_props.setdefault(p, getattr(original, p)) + +# # h5py-specific properties +# if h5py_to_h5py: +# for p in ( +# "maxshape", +# "compression", +# "compression_opts", +# "shuffle", +# "scaleoffset", +# "fletcher32", +# "fillvalue", +# ): +# expect_props.setdefault(p, getattr(original, p)) + +# # common properties with some name differences +# if h5py_to_zarr: +# expect_props.setdefault("fill_value", original.fillvalue) +# if zarr_to_h5py: +# expect_props.setdefault("fillvalue", original.fill_value) + +# # compare properties +# for k, v in expect_props.items(): +# assert v == getattr(copied, k) + +# # compare data +# assert_array_equal(original[:], copied[:]) + +# # compare attrs +# if without_attrs: +# for k in original.attrs.keys(): +# assert k not in copied.attrs +# else: +# if dest_h5py and "filters" in original.attrs: +# # special case in v3 (storing filters metadata under attributes) +# # we explicitly do not copy this info over to HDF5 +# original_attrs = original.attrs.asdict().copy() +# original_attrs.pop("filters") +# else: +# original_attrs = original.attrs +# assert sorted(original_attrs.items()) == sorted(copied.attrs.items()) + + +# def check_copied_group(original, copied, without_attrs=False, expect_props=None, shallow=False): +# # setup +# if expect_props is None: +# expect_props = dict() +# else: +# expect_props = expect_props.copy() + +# # compare children +# for k, v in original.items(): +# if hasattr(v, "shape"): +# assert k in copied +# check_copied_array(v, copied[k], without_attrs=without_attrs, expect_props=expect_props) +# elif shallow: +# assert k not in copied +# else: +# assert k in copied +# check_copied_group( +# v, +# copied[k], +# without_attrs=without_attrs, +# shallow=shallow, +# expect_props=expect_props, +# ) + +# # compare attrs +# if without_attrs: +# for k in original.attrs.keys(): +# assert k not in copied.attrs +# else: +# assert sorted(original.attrs.items()) == sorted(copied.attrs.items()) + + +# def test_copy_all(): +# """ +# https://github.com/zarr-developers/zarr-python/issues/269 + +# copy_all used to not copy attributes as `.keys()` does not return hidden `.zattrs`. + +# """ +# original_group = zarr.group(store=MemoryStore(), overwrite=True) +# original_group.attrs["info"] = "group attrs" +# original_subgroup = original_group.create_group("subgroup") +# original_subgroup.attrs["info"] = "sub attrs" + +# destination_group = zarr.group(store=MemoryStore(), overwrite=True) + +# # copy from memory to directory store +# copy_all( +# original_group, +# destination_group, +# dry_run=False, +# ) + +# assert "subgroup" in destination_group +# assert destination_group.attrs["info"] == "group attrs" +# assert destination_group.subgroup.attrs["info"] == "sub attrs" + + +# class TestCopy: +# @pytest.fixture(params=[False, True], ids=["zarr", "hdf5"]) +# def source(self, request, tmpdir): +# def prep_source(source): +# foo = source.create_group("foo") +# foo.attrs["experiment"] = "weird science" +# baz = foo.create_dataset("bar/baz", data=np.arange(100), chunks=(50,)) +# baz.attrs["units"] = "metres" +# if request.param: +# extra_kws = dict( +# compression="gzip", +# compression_opts=3, +# fillvalue=84, +# shuffle=True, +# fletcher32=True, +# ) +# else: +# extra_kws = dict(compressor=Zlib(3), order="F", fill_value=42, filters=[Adler32()]) +# source.create_dataset( +# "spam", +# data=np.arange(100, 200).reshape(20, 5), +# chunks=(10, 2), +# dtype="i2", +# **extra_kws, +# ) +# return source + +# if request.param: +# h5py = pytest.importorskip("h5py") +# fn = tmpdir.join("source.h5") +# with h5py.File(str(fn), mode="w") as h5f: +# yield prep_source(h5f) +# else: +# yield prep_source(group()) + +# @pytest.fixture(params=[False, True], ids=["zarr", "hdf5"]) +# def dest(self, request, tmpdir): +# if request.param: +# h5py = pytest.importorskip("h5py") +# fn = tmpdir.join("dest.h5") +# with h5py.File(str(fn), mode="w") as h5f: +# yield h5f +# else: +# yield group() + +# def test_copy_array(self, source, dest): +# # copy array with default options +# copy(source["foo/bar/baz"], dest) +# check_copied_array(source["foo/bar/baz"], dest["baz"]) +# copy(source["spam"], dest) +# check_copied_array(source["spam"], dest["spam"]) + +# def test_copy_bad_dest(self, source, dest): +# # try to copy to an array, dest must be a group +# dest = dest.create_dataset("eggs", shape=(100,)) +# with pytest.raises(ValueError): +# copy(source["foo/bar/baz"], dest) + +# def test_copy_array_name(self, source, dest): +# # copy array with name +# copy(source["foo/bar/baz"], dest, name="qux") +# assert "baz" not in dest +# check_copied_array(source["foo/bar/baz"], dest["qux"]) + +# def test_copy_array_create_options(self, source, dest): +# dest_h5py = dest.__module__.startswith("h5py.") + +# # copy array, provide creation options +# compressor = Zlib(9) +# create_kws = dict(chunks=(10,)) +# if dest_h5py: +# create_kws.update( +# compression="gzip", compression_opts=9, shuffle=True, fletcher32=True, fillvalue=42 +# ) +# else: +# create_kws.update(compressor=compressor, fill_value=42, order="F", filters=[Adler32()]) +# copy(source["foo/bar/baz"], dest, without_attrs=True, **create_kws) +# check_copied_array( +# source["foo/bar/baz"], dest["baz"], without_attrs=True, expect_props=create_kws +# ) + +# def test_copy_array_exists_array(self, source, dest): +# # copy array, dest array in the way +# dest.create_dataset("baz", shape=(10,)) + +# # raise +# with pytest.raises(CopyError): +# # should raise by default +# copy(source["foo/bar/baz"], dest) +# assert (10,) == dest["baz"].shape +# with pytest.raises(CopyError): +# copy(source["foo/bar/baz"], dest, if_exists="raise") +# assert (10,) == dest["baz"].shape + +# # skip +# copy(source["foo/bar/baz"], dest, if_exists="skip") +# assert (10,) == dest["baz"].shape + +# # replace +# copy(source["foo/bar/baz"], dest, if_exists="replace") +# check_copied_array(source["foo/bar/baz"], dest["baz"]) + +# # invalid option +# with pytest.raises(ValueError): +# copy(source["foo/bar/baz"], dest, if_exists="foobar") + +# def test_copy_array_exists_group(self, source, dest): +# # copy array, dest group in the way +# dest.create_group("baz") + +# # raise +# with pytest.raises(CopyError): +# copy(source["foo/bar/baz"], dest) +# assert not hasattr(dest["baz"], "shape") +# with pytest.raises(CopyError): +# copy(source["foo/bar/baz"], dest, if_exists="raise") +# assert not hasattr(dest["baz"], "shape") + +# # skip +# copy(source["foo/bar/baz"], dest, if_exists="skip") +# assert not hasattr(dest["baz"], "shape") + +# # replace +# copy(source["foo/bar/baz"], dest, if_exists="replace") +# check_copied_array(source["foo/bar/baz"], dest["baz"]) + +# def test_copy_array_skip_initialized(self, source, dest): +# dest_h5py = dest.__module__.startswith("h5py.") + +# dest.create_dataset("baz", shape=(100,), chunks=(10,), dtype="i8") +# assert not np.all(source["foo/bar/baz"][:] == dest["baz"][:]) + +# if dest_h5py: +# with pytest.raises(ValueError): +# # not available with copy to h5py +# copy(source["foo/bar/baz"], dest, if_exists="skip_initialized") + +# else: +# # copy array, dest array exists but not yet initialized +# copy(source["foo/bar/baz"], dest, if_exists="skip_initialized") +# check_copied_array(source["foo/bar/baz"], dest["baz"]) + +# # copy array, dest array exists and initialized, will be skipped +# dest["baz"][:] = np.arange(100, 200) +# copy(source["foo/bar/baz"], dest, if_exists="skip_initialized") +# assert_array_equal(np.arange(100, 200), dest["baz"][:]) +# assert not np.all(source["foo/bar/baz"][:] == dest["baz"][:]) + +# def test_copy_group(self, source, dest): +# # copy group, default options +# copy(source["foo"], dest) +# check_copied_group(source["foo"], dest["foo"]) + +# def test_copy_group_no_name(self, source, dest): +# with pytest.raises(TypeError): +# # need a name if copy root +# copy(source, dest) + +# copy(source, dest, name="root") +# check_copied_group(source, dest["root"]) + +# def test_copy_group_options(self, source, dest): +# # copy group, non-default options +# copy(source["foo"], dest, name="qux", without_attrs=True) +# assert "foo" not in dest +# check_copied_group(source["foo"], dest["qux"], without_attrs=True) + +# def test_copy_group_shallow(self, source, dest): +# # copy group, shallow +# copy(source, dest, name="eggs", shallow=True) +# check_copied_group(source, dest["eggs"], shallow=True) + +# def test_copy_group_exists_group(self, source, dest): +# # copy group, dest groups exist +# dest.create_group("foo/bar") +# copy(source["foo"], dest) +# check_copied_group(source["foo"], dest["foo"]) + +# def test_copy_group_exists_array(self, source, dest): +# # copy group, dest array in the way +# dest.create_dataset("foo/bar", shape=(10,)) + +# # raise +# with pytest.raises(CopyError): +# copy(source["foo"], dest) +# assert dest["foo/bar"].shape == (10,) +# with pytest.raises(CopyError): +# copy(source["foo"], dest, if_exists="raise") +# assert dest["foo/bar"].shape == (10,) + +# # skip +# copy(source["foo"], dest, if_exists="skip") +# assert dest["foo/bar"].shape == (10,) + +# # replace +# copy(source["foo"], dest, if_exists="replace") +# check_copied_group(source["foo"], dest["foo"]) + +# def test_copy_group_dry_run(self, source, dest): +# # dry run, empty destination +# n_copied, n_skipped, n_bytes_copied = copy( +# source["foo"], dest, dry_run=True, return_stats=True +# ) +# assert 0 == len(dest) +# assert 3 == n_copied +# assert 0 == n_skipped +# assert 0 == n_bytes_copied + +# # dry run, array exists in destination +# baz = np.arange(100, 200) +# dest.create_dataset("foo/bar/baz", data=baz) +# assert not np.all(source["foo/bar/baz"][:] == dest["foo/bar/baz"][:]) +# assert 1 == len(dest) + +# # raise +# with pytest.raises(CopyError): +# copy(source["foo"], dest, dry_run=True) +# assert 1 == len(dest) + +# # skip +# n_copied, n_skipped, n_bytes_copied = copy( +# source["foo"], dest, dry_run=True, if_exists="skip", return_stats=True +# ) +# assert 1 == len(dest) +# assert 2 == n_copied +# assert 1 == n_skipped +# assert 0 == n_bytes_copied +# assert_array_equal(baz, dest["foo/bar/baz"]) + +# # replace +# n_copied, n_skipped, n_bytes_copied = copy( +# source["foo"], dest, dry_run=True, if_exists="replace", return_stats=True +# ) +# assert 1 == len(dest) +# assert 3 == n_copied +# assert 0 == n_skipped +# assert 0 == n_bytes_copied +# assert_array_equal(baz, dest["foo/bar/baz"]) + +# def test_logging(self, source, dest, tmpdir): +# # callable log +# copy(source["foo"], dest, dry_run=True, log=print) + +# # file name +# fn = str(tmpdir.join("log_name")) +# copy(source["foo"], dest, dry_run=True, log=fn) + +# # file +# with tmpdir.join("log_file").open(mode="w") as f: +# copy(source["foo"], dest, dry_run=True, log=f) + +# # bad option +# with pytest.raises(TypeError): +# copy(source["foo"], dest, dry_run=True, log=True) diff --git a/tests/v3/test_common.py b/tests/v3/test_common.py index 3bdbd2bffe..cc33aa75cf 100644 --- a/tests/v3/test_common.py +++ b/tests/v3/test_common.py @@ -64,7 +64,7 @@ def parse_indexing_order_valid(data: Literal["C", "F"]): assert parse_indexing_order(data) == data -@pytest.mark.parametrize("data", [10, ("0", 1, 2, 3), {"0": "0"}, []]) +@pytest.mark.parametrize("data", [("0", 1, 2, 3), {"0": "0"}, []]) def test_parse_shapelike_invalid(data: Any): if isinstance(data, Iterable): if len(data) == 0: