Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/v3' into fix/intermediates
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger committed Sep 27, 2024
2 parents 44e4554 + 5ca080d commit 10fdc90
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 2 deletions.
9 changes: 8 additions & 1 deletion src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,14 @@ def parse_array_metadata(data: Any) -> ArrayV2Metadata | ArrayV3Metadata:
return data
elif isinstance(data, dict):
if data["zarr_format"] == 3:
return ArrayV3Metadata.from_dict(data)
meta_out = ArrayV3Metadata.from_dict(data)
if len(meta_out.storage_transformers) > 0:
msg = (
f"Array metadata contains storage transformers: {meta_out.storage_transformers}."
"Arrays with storage transformers are not supported in zarr-python at this time."
)
raise ValueError(msg)
return meta_out
elif data["zarr_format"] == 2:
return ArrayV2Metadata.from_dict(data)
raise TypeError
Expand Down
21 changes: 21 additions & 0 deletions src/zarr/core/metadata/v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,23 @@ def parse_dimension_names(data: object) -> tuple[str | None, ...] | None:
raise TypeError(msg)


def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]:
"""
Parse storage_transformers. Zarr python cannot use storage transformers
at this time, so this function doesn't attempt to validate them.
"""
if data is None:
return ()
if isinstance(data, Iterable):
if len(tuple(data)) >= 1:
return data # type: ignore[return-value]
else:
return ()
raise TypeError(
f"Invalid storage_transformers. Expected an iterable of dicts. Got {type(data)} instead."
)


class V3JsonEncoder(json.JSONEncoder):
def __init__(self, *args: Any, **kwargs: Any) -> None:
self.indent = kwargs.pop("indent", config.get("json_indent"))
Expand Down Expand Up @@ -144,6 +161,7 @@ class ArrayV3Metadata(ArrayMetadata):
dimension_names: tuple[str, ...] | None = None
zarr_format: Literal[3] = field(default=3, init=False)
node_type: Literal["array"] = field(default="array", init=False)
storage_transformers: tuple[dict[str, JSON], ...]

def __init__(
self,
Expand All @@ -156,6 +174,7 @@ def __init__(
codecs: Iterable[Codec | dict[str, JSON]],
attributes: None | dict[str, JSON],
dimension_names: None | Iterable[str],
storage_transformers: None | Iterable[dict[str, JSON]] = None,
) -> None:
"""
Because the class is a frozen dataclass, we set attributes using object.__setattr__
Expand All @@ -168,6 +187,7 @@ def __init__(
fill_value_parsed = parse_fill_value(fill_value, dtype=data_type_parsed)
attributes_parsed = parse_attributes(attributes)
codecs_parsed_partial = parse_codecs(codecs)
storage_transformers_parsed = parse_storage_transformers(storage_transformers)

array_spec = ArraySpec(
shape=shape_parsed,
Expand All @@ -186,6 +206,7 @@ def __init__(
object.__setattr__(self, "dimension_names", dimension_names_parsed)
object.__setattr__(self, "fill_value", fill_value_parsed)
object.__setattr__(self, "attributes", attributes_parsed)
object.__setattr__(self, "storage_transformers", storage_transformers_parsed)

self._validate_metadata()

Expand Down
24 changes: 23 additions & 1 deletion tests/v3/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@

import zarr.api.asynchronous
from zarr import Array, AsyncArray, Group
from zarr.codecs.bytes import BytesCodec
from zarr.core.array import chunks_initialized
from zarr.core.buffer.cpu import NDBuffer
from zarr.core.common import ZarrFormat
from zarr.core.common import JSON, ZarrFormat
from zarr.core.group import AsyncGroup
from zarr.core.indexing import ceildiv
from zarr.core.sync import sync
Expand Down Expand Up @@ -275,6 +276,27 @@ def test_serializable_sync_array(store: LocalStore, zarr_format: ZarrFormat) ->
np.testing.assert_array_equal(actual[:], expected[:])


@pytest.mark.parametrize("store", ["memory"], indirect=True)
def test_storage_transformers(store: MemoryStore) -> None:
"""
Test that providing an actual storage transformer produces a warning and otherwise passes through
"""
metadata_dict: dict[str, JSON] = {
"zarr_format": 3,
"node_type": "array",
"shape": (10,),
"chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}},
"data_type": "uint8",
"chunk_key_encoding": {"name": "v2", "configuration": {"separator": "/"}},
"codecs": (BytesCodec().to_dict(),),
"fill_value": 0,
"storage_transformers": ({"test": "should_raise"}),
}
match = "Arrays with storage transformers are not supported in zarr-python at this time."
with pytest.raises(ValueError, match=match):
Array.from_dict(StorePath(store), data=metadata_dict)


@pytest.mark.parametrize("test_cls", [Array, AsyncArray])
@pytest.mark.parametrize("nchunks", [2, 5, 10])
def test_nchunks(test_cls: type[Array] | type[AsyncArray], nchunks: int) -> None:
Expand Down
11 changes: 11 additions & 0 deletions tests/v3/test_metadata/test_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from typing import Any

from zarr.abc.codec import Codec
from zarr.core.common import JSON


import numpy as np
Expand Down Expand Up @@ -196,6 +197,7 @@ def test_parse_fill_value_invalid_type_sequence(fill_value: Any, dtype_str: str)
@pytest.mark.parametrize("chunk_key_encoding", ["v2", "default"])
@pytest.mark.parametrize("dimension_separator", [".", "/", None])
@pytest.mark.parametrize("dimension_names", ["nones", "strings", "missing"])
@pytest.mark.parametrize("storage_transformers", [None, ()])
def test_metadata_to_dict(
chunk_grid: str,
codecs: list[Codec],
Expand All @@ -204,6 +206,7 @@ def test_metadata_to_dict(
dimension_separator: Literal[".", "/"] | None,
dimension_names: Literal["nones", "strings", "missing"],
attributes: None | dict[str, Any],
storage_transformers: None | tuple[dict[str, JSON]],
) -> None:
shape = (1, 2, 3)
data_type = "uint8"
Expand Down Expand Up @@ -234,6 +237,7 @@ def test_metadata_to_dict(
"chunk_key_encoding": cke,
"codecs": tuple(c.to_dict() for c in codecs),
"fill_value": fill_value,
"storage_transformers": storage_transformers,
}

if attributes is not None:
Expand All @@ -244,9 +248,16 @@ def test_metadata_to_dict(
metadata = ArrayV3Metadata.from_dict(metadata_dict)
observed = metadata.to_dict()
expected = metadata_dict.copy()

# if unset or None or (), storage_transformers gets normalized to ()
assert observed["storage_transformers"] == ()
observed.pop("storage_transformers")
expected.pop("storage_transformers")

if attributes is None:
assert observed["attributes"] == {}
observed.pop("attributes")

if dimension_separator is None:
if chunk_key_encoding == "default":
expected_cke_dict = DefaultChunkKeyEncoding(separator="/").to_dict()
Expand Down

0 comments on commit 10fdc90

Please sign in to comment.