zarr-developers · d-v-b · May 15, 2024 · Mar 26, 2024 · Mar 27, 2024 · Mar 27, 2024
diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py
@@ -1,6 +1,6 @@
 from abc import abstractmethod, ABC
-
 from collections.abc import AsyncGenerator
+
 from typing import List, Tuple, Optional
 
 

diff --git a/src/zarr/array.py b/src/zarr/array.py
@@ -28,6 +28,7 @@
     ChunkCoords,
     Selection,
     SliceSelection,
+    ZarrFormat,
     concurrent_map,
 )
 from zarr.config import config
@@ -89,6 +90,7 @@ async def create(
         dimension_names: Optional[Iterable[str]] = None,
         attributes: Optional[Dict[str, Any]] = None,
         exists_ok: bool = False,
+        zarr_format: ZarrFormat = 3,
     ) -> AsyncArray:
         store_path = make_store_path(store)
         if not exists_ok:

diff --git a/src/zarr/common.py b/src/zarr/common.py
@@ -5,8 +5,6 @@
     Union,
     Tuple,
     Iterable,
-    Dict,
-    List,
     TypeVar,
     overload,
     Any,
@@ -18,7 +16,7 @@
 import functools
 
 if TYPE_CHECKING:
-    from typing import Any, Awaitable, Callable, Iterator, Optional, Type
+    from typing import Awaitable, Callable, Iterator, Optional, Type
 
 import numpy as np
 
@@ -27,25 +25,26 @@
 ZGROUP_JSON = ".zgroup"
 ZATTRS_JSON = ".zattrs"
 
-BytesLike = Union[bytes, bytearray, memoryview]
-ChunkCoords = Tuple[int, ...]
+BytesLike = bytes | bytearray | memoryview
+ChunkCoords = tuple[int, ...]
 ChunkCoordsLike = Iterable[int]
-SliceSelection = Tuple[slice, ...]
-Selection = Union[slice, SliceSelection]
-JSON = Union[str, None, int, float, Enum, Dict[str, "JSON"], List["JSON"], Tuple["JSON", ...]]
+SliceSelection = tuple[slice, ...]
+Selection = slice | SliceSelection
+ZarrFormat = Literal[2, 3]
+JSON = Union[str, None, int, float, Enum, dict[str, "JSON"], list["JSON"], tuple["JSON", ...]]
 
 
 def product(tup: ChunkCoords) -> int:
     return functools.reduce(lambda x, y: x * y, tup, 1)
 
 
-T = TypeVar("T", bound=Tuple[Any, ...])
+T = TypeVar("T", bound=tuple[Any, ...])
 V = TypeVar("V")
 
 
 async def concurrent_map(
-    items: List[T], func: Callable[..., Awaitable[V]], limit: Optional[int] = None
-) -> List[V]:
+    items: list[T], func: Callable[..., Awaitable[V]], limit: Optional[int] = None
+) -> list[V]:
     if limit is None:
         return await asyncio.gather(*[func(*item) for item in items])
 
@@ -127,18 +126,18 @@ def parse_configuration(data: JSON) -> JSON:
 @overload
 def parse_named_configuration(
     data: JSON, expected_name: Optional[str] = None
-) -> Tuple[str, Dict[str, JSON]]: ...
+) -> tuple[str, dict[str, JSON]]: ...
 
 
 @overload
 def parse_named_configuration(
     data: JSON, expected_name: Optional[str] = None, *, require_configuration: bool = True
-) -> Tuple[str, Optional[Dict[str, JSON]]]: ...
+) -> tuple[str, Optional[dict[str, JSON]]]: ...
 
 
 def parse_named_configuration(
     data: JSON, expected_name: Optional[str] = None, *, require_configuration: bool = True
-) -> Tuple[str, Optional[JSON]]:
+) -> tuple[str, Optional[JSON]]:
     if not isinstance(data, dict):
         raise TypeError(f"Expected dict, got {type(data)}")
     if "name" not in data:
@@ -153,7 +152,7 @@ def parse_named_configuration(
     return name_parsed, configuration_parsed
 
 
-def parse_shapelike(data: Any) -> Tuple[int, ...]:
+def parse_shapelike(data: Any) -> tuple[int, ...]:
     if not isinstance(data, Iterable):
         raise TypeError(f"Expected an iterable. Got {data} instead.")
     data_tuple = tuple(data)

diff --git a/src/zarr/group.py b/src/zarr/group.py
@@ -5,21 +5,19 @@
 import asyncio
 import json
 import logging
+import numpy.typing as npt
 
 if TYPE_CHECKING:
-    from typing import (
-        Any,
-        AsyncGenerator,
-        Literal,
-        AsyncIterator,
-    )
+    from typing import Any, AsyncGenerator, Literal, Iterable
+from zarr.abc.codec import Codec
 from zarr.abc.metadata import Metadata
 
 from zarr.array import AsyncArray, Array
 from zarr.attributes import Attributes
-from zarr.common import ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON
+from zarr.common import ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON, ChunkCoords
 from zarr.store import StoreLike, StorePath, make_store_path
 from zarr.sync import SyncMixin, sync
+from typing import overload
 
 logger = logging.getLogger("zarr.group")
 
@@ -41,6 +39,26 @@ def parse_attributes(data: Any) -> dict[str, Any]:
     raise TypeError(msg)
 
 
+@overload
+def _parse_async_node(node: AsyncArray) -> Array: ...
+
+
+@overload
+def _parse_async_node(node: AsyncGroup) -> Group: ...
+
+
+def _parse_async_node(node: AsyncArray | AsyncGroup) -> Array | Group:
+    """
+    Wrap an AsyncArray in an Array, or an AsyncGroup in a Group.
+    """
+    if isinstance(node, AsyncArray):
+        return Array(node)
+    elif isinstance(node, AsyncGroup):
+        return Group(node)
+    else:
+        assert False
+
+
 @dataclass(frozen=True)
 class GroupMetadata(Metadata):
     attributes: dict[str, Any] = field(default_factory=dict)
@@ -53,7 +71,7 @@ def to_bytes(self) -> dict[str, bytes]:
             return {ZARR_JSON: json.dumps(self.to_dict()).encode()}
         else:
             return {
-                ZGROUP_JSON: json.dumps({"zarr_format": 2}).encode(),
+                ZGROUP_JSON: json.dumps({"zarr_format": self.zarr_format}).encode(),
                 ZATTRS_JSON: json.dumps(self.attributes).encode(),
             }
 
@@ -113,11 +131,11 @@ async def open(
                 (store_path / ZGROUP_JSON).get(), (store_path / ZATTRS_JSON).get()
             )
             if zgroup_bytes is None:
-                raise KeyError(store_path)  # filenotfounderror?
+                raise FileNotFoundError(store_path)
         elif zarr_format == 3:
             zarr_json_bytes = await (store_path / ZARR_JSON).get()
             if zarr_json_bytes is None:
-                raise KeyError(store_path)  # filenotfounderror?
+                raise FileNotFoundError(store_path)
         elif zarr_format is None:
             zarr_json_bytes, zgroup_bytes, zattrs_bytes = await asyncio.gather(
                 (store_path / ZARR_JSON).get(),
@@ -168,17 +186,14 @@ async def getitem(
         key: str,
     ) -> AsyncArray | AsyncGroup:
         store_path = self.store_path / key
+        logger.warning("key=%s, store_path=%s", key, store_path)
 
         # Note:
         # in zarr-python v2, we first check if `key` references an Array, else if `key` references
         # a group,using standalone `contains_array` and `contains_group` functions. These functions
         # are reusable, but for v3 they would perform redundant I/O operations.
         # Not clear how much of that strategy we want to keep here.
 
-        # if `key` names an object in storage, it cannot be an array or group
-        if await store_path.exists():
-            raise KeyError(key)
-
         if self.metadata.zarr_format == 3:
             zarr_json_bytes = await (store_path / ZARR_JSON).get()
             if zarr_json_bytes is None:
@@ -248,16 +263,42 @@ def attrs(self):
     def info(self):
         return self.metadata.info
 
-    async def create_group(self, path: str, **kwargs) -> AsyncGroup:
+    async def create_group(
+        self, path: str, exists_ok: bool = False, attributes: dict[str, Any] = {}
+    ) -> AsyncGroup:
         return await type(self).create(
             self.store_path / path,
-            **kwargs,
+            attributes=attributes,
+            exists_ok=exists_ok,
+            zarr_format=self.metadata.zarr_format,
         )
 
-    async def create_array(self, path: str, **kwargs) -> AsyncArray:
+    async def create_array(
+        self,
+        path: str,
+        shape: ChunkCoords,
+        dtype: npt.DTypeLike,
+        chunk_shape: ChunkCoords,
+        fill_value: Any | None = None,
+        chunk_key_encoding: tuple[Literal["default"], Literal[".", "/"]]
+        | tuple[Literal["v2"], Literal[".", "/"]] = ("default", "/"),
+        codecs: Iterable[Codec | dict[str, Any]] | None = None,
+        dimension_names: Iterable[str] | None = None,
+        attributes: dict[str, Any] | None = None,
+        exists_ok: bool = False,
+    ) -> AsyncArray:
         return await AsyncArray.create(
             self.store_path / path,
-            **kwargs,
+            shape=shape,
+            dtype=dtype,
+            chunk_shape=chunk_shape,
+            fill_value=fill_value,
+            chunk_key_encoding=chunk_key_encoding,
+            codecs=codecs,
+            dimension_names=dimension_names,
+            attributes=attributes,
+            exists_ok=exists_ok,
+            zarr_format=self.metadata.zarr_format,
         )
 
     async def update_attributes(self, new_attributes: dict[str, Any]):
@@ -348,7 +389,7 @@ async def array_keys(self) -> AsyncGenerator[str, None]:
                 yield key
 
     # todo: decide if this method should be separate from `array_keys`
-    async def arrays(self) -> AsyncIterator[AsyncArray]:
+    async def arrays(self) -> AsyncGenerator[AsyncArray, None]:
         async for key, value in self.members():
             if isinstance(value, AsyncArray):
                 yield value
@@ -472,19 +513,13 @@ def nmembers(self) -> int:
     @property
     def members(self) -> tuple[tuple[str, Array | Group], ...]:
         """
-        Return the sub-arrays and sub-groups of this group as a `tuple` of (name, array | group)
+        Return the sub-arrays and sub-groups of this group as a tuple of (name, array | group)
         pairs
         """
-        _members: list[tuple[str, AsyncArray | AsyncGroup]] = self._sync_iter(
-            self._async_group.members()
-        )
-        ret: list[tuple[str, Array | Group]] = []
-        for key, value in _members:
-            if isinstance(value, AsyncArray):
-                ret.append((key, Array(value)))
-            else:
-                ret.append((key, Group(value)))
-        return tuple(ret)
+        _members = self._sync_iter(self._async_group.members())
+
+        result = tuple(map(lambda kv: (kv[0], _parse_async_node(kv[1])), _members))
+        return result
 
     def __contains__(self, member) -> bool:
         return self._sync(self._async_group.contains(member))

diff --git a/src/zarr/store/local.py b/src/zarr/store/local.py
@@ -4,20 +4,20 @@
 import shutil
 from collections.abc import AsyncGenerator
 from pathlib import Path
-from typing import Union, Optional, List, Tuple
 
 from zarr.abc.store import Store
 from zarr.common import BytesLike, concurrent_map, to_thread
 
 
-def _get(path: Path, byte_range: Optional[Tuple[int, Optional[int]]] = None) -> bytes:
+def _get(path: Path, byte_range: tuple[int, int | None] | None) -> bytes:
     """
     Fetch a contiguous region of bytes from a file.
+
     Parameters
     ----------
     path: Path
         The file to read bytes from.
-    byte_range: Optional[Tuple[int, Optional[int]]] = None
+    byte_range: tuple[int, int | None] | None = None
         The range of bytes to read. If `byte_range` is `None`, then the entire file will be read.
         If `byte_range` is a tuple, the first value specifies the index of the first byte to read,
         and the second value specifies the total number of bytes to read. If the total value is
@@ -49,7 +49,7 @@ def _get(path: Path, byte_range: Optional[Tuple[int, Optional[int]]] = None) ->
 def _put(
     path: Path,
     value: BytesLike,
-    start: Optional[int] = None,
+    start: int | None = None,
     auto_mkdir: bool = True,
 ) -> int | None:
     if auto_mkdir:
@@ -71,7 +71,7 @@ class LocalStore(Store):
     root: Path
     auto_mkdir: bool
 
-    def __init__(self, root: Union[Path, str], auto_mkdir: bool = True):
+    def __init__(self, root: Path | str, auto_mkdir: bool = True):
         if isinstance(root, str):
             root = Path(root)
         assert isinstance(root, Path)
@@ -88,9 +88,7 @@ def __repr__(self) -> str:
     def __eq__(self, other: object) -> bool:
         return isinstance(other, type(self)) and self.root == other.root
 
-    async def get(
-        self, key: str, byte_range: Optional[Tuple[int, Optional[int]]] = None
-    ) -> Optional[bytes]:
+    async def get(self, key: str, byte_range: tuple[int, int | None] | None = None) -> bytes | None:
         assert isinstance(key, str)
         path = self.root / key
 
@@ -100,8 +98,8 @@ async def get(
             return None
 
     async def get_partial_values(
-        self, key_ranges: List[Tuple[str, Tuple[int, int]]]
-    ) -> List[Optional[bytes]]:
+        self, key_ranges: list[tuple[str, tuple[int, int]]]
+    ) -> list[bytes | None]:
         """
         Read byte ranges from multiple keys.
         Parameters
@@ -124,7 +122,7 @@ async def set(self, key: str, value: BytesLike) -> None:
         path = self.root / key
         await to_thread(_put, path, value, auto_mkdir=self.auto_mkdir)
 
-    async def set_partial_values(self, key_start_values: List[Tuple[str, int, bytes]]) -> None:
+    async def set_partial_values(self, key_start_values: list[tuple[str, int, bytes]]) -> None:
         args = []
         for key, start, value in key_start_values:
             assert isinstance(key, str)
@@ -169,6 +167,9 @@ async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
         -------
         AsyncGenerator[str, None]
         """
+        for p in (self.root / prefix).rglob("*"):
+            if p.is_file():
+                yield str(p)
 
         to_strip = str(self.root) + "/"
         for p in (self.root / prefix).rglob("*"):

diff --git a/src/zarr/store/memory.py b/src/zarr/store/memory.py
@@ -88,4 +88,4 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
         else:
             for key in self._store_dict:
                 if key.startswith(prefix + "/") and key != prefix:
-                    yield key.strip(prefix + "/").split("/")[0]
+                    yield key.removeprefix(prefix + "/").split("/")[0]
diff --git a/tests/v2/conftest.py b/tests/v2/conftest.py
@@ -1,5 +1,5 @@
-import pathlib
 import pytest
+import pathlib
 
 
 @pytest.fixture(params=[str, pathlib.Path])

diff --git a/tests/v3/__init__.py b/tests/v3/__init__.py