diff --git a/pyproject.toml b/pyproject.toml index b438a2c292..7b516bbc05 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -363,6 +363,7 @@ filterwarnings = [ "ignore:The loop argument is deprecated since Python 3.8.*:DeprecationWarning", "ignore:Creating a zarr.buffer.gpu.*:UserWarning", "ignore:Duplicate name:UserWarning", # from ZipFile + "ignore:.*is currently not part in the Zarr version 3 specification.*:UserWarning", ] markers = [ "gpu: mark a test as requiring CuPy and GPU" diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index b5dbb0cfa5..2d1c26e145 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -195,6 +195,14 @@ async def consolidate_metadata( v = dataclasses.replace(v, consolidated_metadata=ConsolidatedMetadata(metadata={})) members_metadata[k] = v + if any(m.zarr_format == 3 for m in members_metadata.values()): + warnings.warn( + "Consolidated metadata is currently not part in the Zarr version 3 specification. It " + "may not be supported by other zarr implementations and may change in the future.", + category=UserWarning, + stacklevel=1, + ) + ConsolidatedMetadata._flat_to_nested(members_metadata) consolidated_metadata = ConsolidatedMetadata(metadata=members_metadata) @@ -203,6 +211,7 @@ async def consolidate_metadata( group, metadata=metadata, ) + await group._save_metadata() return group diff --git a/src/zarr/codecs/vlen_utf8.py b/src/zarr/codecs/vlen_utf8.py index 43544e0809..e5b895ae0c 100644 --- a/src/zarr/codecs/vlen_utf8.py +++ b/src/zarr/codecs/vlen_utf8.py @@ -2,6 +2,7 @@ from dataclasses import dataclass from typing import TYPE_CHECKING +from warnings import warn import numpy as np from numcodecs.vlen import VLenBytes, VLenUTF8 @@ -25,6 +26,15 @@ @dataclass(frozen=True) class VLenUTF8Codec(ArrayBytesCodec): + def __init__(self) -> None: + warn( + "The codec `vlen-utf8` is currently not part in the Zarr version 3 specification. It " + "may not be supported by other zarr implementations and may change in the future.", + category=UserWarning, + stacklevel=2, + ) + super().__init__() + @classmethod def from_dict(cls, data: dict[str, JSON]) -> Self: _, configuration_parsed = parse_named_configuration( @@ -71,6 +81,15 @@ def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) - @dataclass(frozen=True) class VLenBytesCodec(ArrayBytesCodec): + def __init__(self) -> None: + warn( + "The codec `vlen-bytes` is currently not part in the Zarr version 3 specification. It " + "may not be supported by other zarr implementations and may change in the future.", + category=UserWarning, + stacklevel=2, + ) + super().__init__() + @classmethod def from_dict(cls, data: dict[str, JSON]) -> Self: _, configuration_parsed = parse_named_configuration( diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index aab7e2a527..b57712717b 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -6,6 +6,7 @@ from itertools import starmap from logging import getLogger from typing import TYPE_CHECKING, Any, Generic, Literal, cast, overload +from warnings import warn import numpy as np import numpy.typing as npt @@ -539,7 +540,7 @@ async def _create_v3( store_path: StorePath, *, shape: ShapeLike, - dtype: npt.DTypeLike, + dtype: np.dtype[Any], chunk_shape: ChunkCoords, fill_value: Any | None = None, order: MemoryOrder | None = None, @@ -580,6 +581,14 @@ async def _create_v3( else DefaultChunkKeyEncoding(separator=chunk_key_encoding[1]) ) + if dtype.kind in "UTS": + warn( + f"The dtype `{dtype}` is currently not part in the Zarr version 3 specification. It " + "may not be supported by other zarr implementations and may change in the future.", + category=UserWarning, + stacklevel=2, + ) + metadata = ArrayV3Metadata( shape=shape, data_type=dtype, @@ -601,7 +610,7 @@ async def _create_v2( store_path: StorePath, *, shape: ChunkCoords, - dtype: npt.DTypeLike, + dtype: np.dtype[Any], chunks: ChunkCoords, dimension_separator: Literal[".", "/"] | None = None, fill_value: float | None = None, diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index b800ae4d73..3e925e08bd 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -95,14 +95,14 @@ def validate_codecs(codecs: tuple[Codec, ...], dtype: DataType) -> None: # we need to have special codecs if we are decoding vlen strings or bytestrings # TODO: use codec ID instead of class name - codec_id = abc.__class__.__name__ - if dtype == DataType.string and not codec_id == "VLenUTF8Codec": + codec_class_name = abc.__class__.__name__ + if dtype == DataType.string and not codec_class_name == "VLenUTF8Codec": raise ValueError( - f"For string dtype, ArrayBytesCodec must be `VLenUTF8Codec`, got `{codec_id}`." + f"For string dtype, ArrayBytesCodec must be `VLenUTF8Codec`, got `{codec_class_name}`." ) - if dtype == DataType.bytes and not codec_id == "VLenBytesCodec": + if dtype == DataType.bytes and not codec_class_name == "VLenBytesCodec": raise ValueError( - f"For bytes dtype, ArrayBytesCodec must be `VLenBytesCodec`, got `{codec_id}`." + f"For bytes dtype, ArrayBytesCodec must be `VLenBytesCodec`, got `{codec_class_name}`." )