Skip to content

Commit cc4dff9

Browse files
d-v-bjhammannormanrz
authored
top-level functions for reading, creating data (#2463)
* add functions for easy read-only data access * sync funcs * make read-only funcs top-level exports * add create_array, create_group, and tests * add top-level imports * add test for top-level exports * add test for read * add asserts * Apply suggestions from code review * handle sharding in create_array * tweak * make logic of _auto_partition better for shard shape * add dtype parsing, and tweak auto_partitioning func * sketch of docstring; remove auto chunks / shard shape * tweak docstring * docstrings * ensure tests pass * tuple -> list * allow data in create_array * docstring * remove auto_partition * make shape shapelike * use create_array everywhere in group class * remove readers * fix dodgy imports * compressors -> compression, auto chunking, auto sharding, auto compression, auto filters * use sane shard shape when there are too few chunks * fix: allow user-specified filters and compression * np.dtype[np.generic] -> np.dtype[Any] * handle singleton compressor / filters input * default codec config now uses the full config dict * test for auto sharding * test * adds a shards property * add (typed) functions for resolving codecs * better codec parsing * add warning if auto sharding is used * remove read_array * rename compression to compressors, and make the docstring for create_array more clear on what filters and compressors mean * compression -> compressors, shard_shape -> shards, chunk_shape -> chunks * use typerror instead of valuerror; docstring * default order is None * fix circular dep * format * fix some tests * use filters=auto and compressors=auto in Group.create_array * compression -> compressors * Update src/zarr/core/group.py Co-authored-by: Norman Rzepka <code@normanrz.com> * fix mypy * narrow type of filters param and compression param * remove data kwarg to create_array * mypy fixes * ensure that we accept dict form of compressor in _parse_chunk_encoding_v2 * fix properties test * add tests for compressors and filters kwargs to create_array * add tests for codec inference * add test for illegal shards kwarg for v2 arrays * remove redundant test function * tests and types * rm print * types * resolve cyclic import * add create_array to async and sync API * docs for create_array * rename (Async)Array.create to _create * adds array_bytes_codec kwarg * tests * tests for no filters+compressors * widen type of FiltersParam to include single numcodecs codec instances * don't alias None to default codecs in _create_v2 * allow single codec instances for filters, and None for filters / compressor, and condense some tests * add docstring for None * single-item tuple for compressors in v2 * Update src/zarr/core/array.py * tweaks * pr feedback 1 * tests * mypy * rename array_bytes_codec to serializer * Update src/zarr/api/asynchronous.py Co-authored-by: Joe Hamman <jhamman1@gmail.com> * docstrings * *params -> *like * *params -> *like, in tests * adds deprecated compressor arg to Group.create_array * docs --------- Co-authored-by: Joe Hamman <joe@earthmover.io> Co-authored-by: Norman Rzepka <code@normanrz.com> Co-authored-by: Joe Hamman <jhamman1@gmail.com>
1 parent 7907852 commit cc4dff9

33 files changed

+2399
-799
lines changed

src/zarr/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
copy_all,
77
copy_store,
88
create,
9+
create_array,
10+
create_group,
911
empty,
1012
empty_like,
1113
full,
@@ -46,6 +48,8 @@
4648
"copy_all",
4749
"copy_store",
4850
"create",
51+
"create_array",
52+
"create_group",
4953
"empty",
5054
"empty_like",
5155
"full",

src/zarr/api/asynchronous.py

Lines changed: 79 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,23 @@
99
import numpy.typing as npt
1010
from typing_extensions import deprecated
1111

12-
from zarr.core.array import Array, AsyncArray, get_array_metadata
13-
from zarr.core.array_spec import ArrayConfig, ArrayConfigParams
12+
from zarr.core.array import Array, AsyncArray, create_array, get_array_metadata
13+
from zarr.core.array_spec import ArrayConfig, ArrayConfigLike
1414
from zarr.core.buffer import NDArrayLike
1515
from zarr.core.common import (
1616
JSON,
1717
AccessModeLiteral,
1818
ChunkCoords,
1919
MemoryOrder,
2020
ZarrFormat,
21+
_default_zarr_format,
2122
_warn_order_kwarg,
2223
_warn_write_empty_chunks_kwarg,
2324
parse_dtype,
2425
)
25-
from zarr.core.config import config
2626
from zarr.core.group import AsyncGroup, ConsolidatedMetadata, GroupMetadata
2727
from zarr.core.metadata import ArrayMetadataDict, ArrayV2Metadata, ArrayV3Metadata
28-
from zarr.core.metadata.v2 import _default_filters_and_compressor
28+
from zarr.core.metadata.v2 import _default_compressor, _default_filters
2929
from zarr.errors import NodeTypeValidationError
3030
from zarr.storage import (
3131
StoreLike,
@@ -49,6 +49,7 @@
4949
"copy_all",
5050
"copy_store",
5151
"create",
52+
"create_array",
5253
"empty",
5354
"empty_like",
5455
"full",
@@ -150,11 +151,6 @@ def _handle_zarr_version_or_format(
150151
return zarr_format
151152

152153

153-
def _default_zarr_version() -> ZarrFormat:
154-
"""Return the default zarr_version"""
155-
return cast(ZarrFormat, int(config.get("default_zarr_version", 3)))
156-
157-
158154
async def consolidate_metadata(
159155
store: StoreLike,
160156
path: str | None = None,
@@ -300,8 +296,8 @@ async def open(
300296
path : str or None, optional
301297
The path within the store to open.
302298
storage_options : dict
303-
If using an fsspec URL to create the store, these will be passed to
304-
the backend implementation. Ignored otherwise.
299+
If the store is backed by an fsspec-based implementation, then this dict will be passed to
300+
the Store constructor for that implementation. Ignored otherwise.
305301
**kwargs
306302
Additional parameters are passed through to :func:`zarr.creation.open_array` or
307303
:func:`zarr.hierarchy.open_group`.
@@ -417,7 +413,7 @@ async def save_array(
417413
"""
418414
zarr_format = (
419415
_handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
420-
or _default_zarr_version()
416+
or _default_zarr_format()
421417
)
422418
if not isinstance(arr, NDArrayLike):
423419
raise TypeError("arr argument must be numpy or other NDArrayLike array")
@@ -429,7 +425,7 @@ async def save_array(
429425
shape = arr.shape
430426
chunks = getattr(arr, "chunks", None) # for array-likes with chunks attribute
431427
overwrite = kwargs.pop("overwrite", None) or _infer_overwrite(mode)
432-
new = await AsyncArray.create(
428+
new = await AsyncArray._create(
433429
store_path,
434430
zarr_format=zarr_format,
435431
shape=shape,
@@ -477,7 +473,7 @@ async def save_group(
477473
zarr_version=zarr_version,
478474
zarr_format=zarr_format,
479475
)
480-
or _default_zarr_version()
476+
or _default_zarr_format()
481477
)
482478

483479
for arg in args:
@@ -657,7 +653,7 @@ async def group(
657653
try:
658654
return await AsyncGroup.open(store=store_path, zarr_format=zarr_format)
659655
except (KeyError, FileNotFoundError):
660-
_zarr_format = zarr_format or _default_zarr_version()
656+
_zarr_format = zarr_format or _default_zarr_format()
661657
return await AsyncGroup.from_store(
662658
store=store_path,
663659
zarr_format=_zarr_format,
@@ -666,6 +662,56 @@ async def group(
666662
)
667663

668664

665+
async def create_group(
666+
*,
667+
store: StoreLike,
668+
path: str | None = None,
669+
overwrite: bool = False,
670+
zarr_format: ZarrFormat | None = None,
671+
attributes: dict[str, Any] | None = None,
672+
storage_options: dict[str, Any] | None = None,
673+
) -> AsyncGroup:
674+
"""Create a group.
675+
676+
Parameters
677+
----------
678+
store : Store or str
679+
Store or path to directory in file system.
680+
path : str, optional
681+
Group path within store.
682+
overwrite : bool, optional
683+
If True, pre-existing data at ``path`` will be deleted before
684+
creating the group.
685+
zarr_format : {2, 3, None}, optional
686+
The zarr format to use when saving.
687+
If no ``zarr_format`` is provided, the default format will be used.
688+
This default can be changed by modifying the value of ``default_zarr_format``
689+
in :mod:`zarr.core.config`.
690+
storage_options : dict
691+
If using an fsspec URL to create the store, these will be passed to
692+
the backend implementation. Ignored otherwise.
693+
694+
Returns
695+
-------
696+
AsyncGroup
697+
The new group.
698+
"""
699+
700+
if zarr_format is None:
701+
zarr_format = _default_zarr_format()
702+
703+
mode: Literal["a"] = "a"
704+
705+
store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)
706+
707+
return await AsyncGroup.from_store(
708+
store=store_path,
709+
zarr_format=zarr_format,
710+
overwrite=overwrite,
711+
attributes=attributes,
712+
)
713+
714+
669715
async def open_group(
670716
store: StoreLike | None = None,
671717
*, # Note: this is a change from v2
@@ -768,7 +814,7 @@ async def open_group(
768814
pass
769815
if mode in _CREATE_MODES:
770816
overwrite = _infer_overwrite(mode)
771-
_zarr_format = zarr_format or _default_zarr_version()
817+
_zarr_format = zarr_format or _default_zarr_format()
772818
return await AsyncGroup.from_store(
773819
store_path,
774820
zarr_format=_zarr_format,
@@ -813,7 +859,7 @@ async def create(
813859
codecs: Iterable[Codec | dict[str, JSON]] | None = None,
814860
dimension_names: Iterable[str] | None = None,
815861
storage_options: dict[str, Any] | None = None,
816-
config: ArrayConfig | ArrayConfigParams | None = None,
862+
config: ArrayConfig | ArrayConfigLike | None = None,
817863
**kwargs: Any,
818864
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
819865
"""Create an array.
@@ -843,8 +889,8 @@ async def create(
843889
If no codecs are provided, default codecs will be used:
844890
845891
- For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
846-
- For Unicode strings, the default is ``VLenUTF8Codec``.
847-
- For bytes or objects, the default is ``VLenBytesCodec``.
892+
- For Unicode strings, the default is ``VLenUTF8Codec`` and ``ZstdCodec``.
893+
- For bytes or objects, the default is ``VLenBytesCodec`` and ``ZstdCodec``.
848894
849895
These defaults can be changed by modifying the value of ``array.v3_default_codecs`` in :mod:`zarr.core.config`.
850896
compressor : Codec, optional
@@ -857,7 +903,8 @@ async def create(
857903
- For Unicode strings, the default is ``VLenUTF8Codec``.
858904
- For bytes or objects, the default is ``VLenBytesCodec``.
859905
860-
These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in :mod:`zarr.core.config`. fill_value : object
906+
These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in :mod:`zarr.core.config`.
907+
fill_value : object
861908
Default value to use for uninitialized portions of the array.
862909
order : {'C', 'F'}, optional
863910
Deprecated in favor of the ``config`` keyword argument.
@@ -878,8 +925,8 @@ async def create(
878925
for storage of both chunks and metadata.
879926
filters : sequence of Codecs, optional
880927
Sequence of filters to use to encode chunk data prior to compression.
881-
V2 only. If neither ``compressor`` nor ``filters`` are provided, a default
882-
compressor will be used. (see ``compressor`` for details).
928+
V2 only. If no ``filters`` are provided, a default set of filters will be used.
929+
These defaults can be changed by modifying the value of ``array.v2_default_filters`` in :mod:`zarr.core.config`.
883930
cache_metadata : bool, optional
884931
If True, array configuration metadata will be cached for the
885932
lifetime of the object. If False, array metadata will be reloaded
@@ -914,7 +961,7 @@ async def create(
914961
storage_options : dict
915962
If using an fsspec URL to create the store, these will be passed to
916963
the backend implementation. Ignored otherwise.
917-
config : ArrayConfig or ArrayConfigParams, optional
964+
config : ArrayConfig or ArrayConfigLike, optional
918965
Runtime configuration of the array. If provided, will override the
919966
default values from `zarr.config.array`.
920967
@@ -925,15 +972,17 @@ async def create(
925972
"""
926973
zarr_format = (
927974
_handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
928-
or _default_zarr_version()
975+
or _default_zarr_format()
929976
)
930977

931978
if zarr_format == 2:
932979
if chunks is None:
933980
chunks = shape
934981
dtype = parse_dtype(dtype, zarr_format)
935-
if not filters and not compressor:
936-
filters, compressor = _default_filters_and_compressor(dtype)
982+
if not filters:
983+
filters = _default_filters(dtype)
984+
if not compressor:
985+
compressor = _default_compressor(dtype)
937986
elif zarr_format == 3 and chunk_shape is None: # type: ignore[redundant-expr]
938987
if chunks is not None:
939988
chunk_shape = chunks
@@ -971,7 +1020,7 @@ async def create(
9711020
mode = "a"
9721021
store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)
9731022

974-
config_dict: ArrayConfigParams = {}
1023+
config_dict: ArrayConfigLike = {}
9751024

9761025
if write_empty_chunks is not None:
9771026
if config is not None:
@@ -994,7 +1043,7 @@ async def create(
9941043

9951044
config_parsed = ArrayConfig.from_dict(config_dict)
9961045

997-
return await AsyncArray.create(
1046+
return await AsyncArray._create(
9981047
store_path,
9991048
shape=shape,
10001049
chunks=chunks,
@@ -1173,7 +1222,7 @@ async def open_array(
11731222
If using an fsspec URL to create the store, these will be passed to
11741223
the backend implementation. Ignored otherwise.
11751224
**kwargs
1176-
Any keyword arguments to pass to ``create``.
1225+
Any keyword arguments to pass to :func:`create`.
11771226
11781227
Returns
11791228
-------
@@ -1196,7 +1245,7 @@ async def open_array(
11961245
except FileNotFoundError:
11971246
if not store_path.read_only and mode in _CREATE_MODES:
11981247
overwrite = _infer_overwrite(mode)
1199-
_zarr_format = zarr_format or _default_zarr_version()
1248+
_zarr_format = zarr_format or _default_zarr_format()
12001249
return await create(
12011250
store=store_path,
12021251
zarr_format=_zarr_format,

0 commit comments

Comments
 (0)