From 97246cb9ebc382ca12e4d17886f01deb1d725523 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 30 Sep 2024 17:33:08 +0100 Subject: [PATCH 1/3] Bump the actions group across 1 directory with 2 updates (#2257) Updates the requirements on [sphinx](https://github.com/sphinx-doc/sphinx) and [sphinx-autoapi](https://github.com/readthedocs/sphinx-autoapi) to permit the latest version. Updates `sphinx` to 8.0.2 - [Release notes](https://github.com/sphinx-doc/sphinx/releases) - [Changelog](https://github.com/sphinx-doc/sphinx/blob/v8.0.2/CHANGES.rst) - [Commits](https://github.com/sphinx-doc/sphinx/compare/v0.1.61611...v8.0.2) Updates `sphinx-autoapi` to 3.3.2 - [Release notes](https://github.com/readthedocs/sphinx-autoapi/releases) - [Changelog](https://github.com/readthedocs/sphinx-autoapi/blob/main/CHANGELOG.rst) - [Commits](https://github.com/readthedocs/sphinx-autoapi/compare/v0.2.0...v3.3.2) --- updated-dependencies: - dependency-name: sphinx dependency-type: direct:production dependency-group: actions - dependency-name: sphinx-autoapi dependency-type: direct:production dependency-group: actions ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 80f709c94..1759d3919 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,9 +78,9 @@ gpu = [ "cupy-cuda12x", ] docs = [ - 'sphinx==7.4.7', + 'sphinx==8.0.2', 'sphinx-autobuild>=2021.3.14', - 'sphinx-autoapi==3.3.1', + 'sphinx-autoapi==3.3.2', 'sphinx_design', 'sphinx-issues', 'sphinx-copybutton', From 2761845c5e0e5ac05ee625494b4aac96afc3c4d4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 30 Sep 2024 14:45:28 -0700 Subject: [PATCH 2/3] chore: update pre-commit hooks (#2277) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.6.7 → v0.6.8](https://github.com/astral-sh/ruff-pre-commit/compare/v0.6.7...v0.6.8) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 99a69dc54..e7868d29d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ default_language_version: python: python3 repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.7 + rev: v0.6.8 hooks: - id: ruff args: ["--fix", "--show-fixes"] From f3a2e0ad3b5b76e1015629f61e8548d0cab84190 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 1 Oct 2024 08:00:54 -0700 Subject: [PATCH 3/3] [v3] fix: zarr v2 compatibility fixes for Dask (#2186) * fix: zarr v2 compatability fixes - port normalize_chunks from v2 - add array.store property - default to append in create * move zarr.store to zarr.storage also fix failing ci * make chunks a tuple * Apply suggestions from code review * more merge conflict resolution * fixups * fixup zipstore * Apply suggestions from code review * Apply suggestions from code review * add test * extend test * clean up parents * debug race condition * more debug * Update src/zarr/core/array.py --- src/zarr/api/asynchronous.py | 25 ++++++--- src/zarr/api/synchronous.py | 2 +- src/zarr/core/array.py | 59 ++++++++++++++-------- src/zarr/core/chunk_grids.py | 46 ++++++++++++++++- src/zarr/core/group.py | 22 ++++++-- src/zarr/storage/__init__.py | 15 ++++++ src/zarr/{store => storage}/_utils.py | 0 src/zarr/{store => storage}/common.py | 21 ++++---- src/zarr/{store => storage}/local.py | 0 src/zarr/{store => storage}/logging.py | 4 +- src/zarr/{store => storage}/memory.py | 6 +-- src/zarr/{store => storage}/remote.py | 2 +- src/zarr/{store => storage}/zip.py | 4 +- src/zarr/store/__init__.py | 15 ------ src/zarr/testing/buffer.py | 2 +- src/zarr/testing/store.py | 6 +-- src/zarr/testing/strategies.py | 2 +- tests/v3/conftest.py | 4 +- tests/v3/test_api.py | 7 +-- tests/v3/test_array.py | 9 ++-- tests/v3/test_attributes.py | 6 +-- tests/v3/test_buffer.py | 4 +- tests/v3/test_chunk_grids.py | 38 +++++++++++++- tests/v3/test_codecs/test_blosc.py | 2 +- tests/v3/test_codecs/test_codecs.py | 11 +--- tests/v3/test_codecs/test_endian.py | 2 +- tests/v3/test_codecs/test_gzip.py | 2 +- tests/v3/test_codecs/test_sharding.py | 2 +- tests/v3/test_codecs/test_transpose.py | 2 +- tests/v3/test_codecs/test_zstd.py | 2 +- tests/v3/test_group.py | 32 +++++++----- tests/v3/test_indexing.py | 4 +- tests/v3/test_store/test_core.py | 10 ++-- tests/v3/test_store/test_local.py | 2 +- tests/v3/test_store/test_logging.py | 6 +-- tests/v3/test_store/test_memory.py | 2 +- tests/v3/test_store/test_remote.py | 2 +- tests/v3/test_store/test_stateful_store.py | 2 +- tests/v3/test_store/test_zip.py | 2 +- tests/v3/test_sync.py | 2 +- tests/v3/test_v2.py | 2 +- 41 files changed, 255 insertions(+), 133 deletions(-) create mode 100644 src/zarr/storage/__init__.py rename src/zarr/{store => storage}/_utils.py (100%) rename src/zarr/{store => storage}/common.py (94%) rename src/zarr/{store => storage}/local.py (100%) rename src/zarr/{store => storage}/logging.py (98%) rename src/zarr/{store => storage}/memory.py (97%) rename src/zarr/{store => storage}/remote.py (99%) rename src/zarr/{store => storage}/zip.py (98%) delete mode 100644 src/zarr/store/__init__.py diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 76e65e671..62a973257 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -7,14 +7,16 @@ import numpy as np import numpy.typing as npt +from zarr.abc.store import Store from zarr.core.array import Array, AsyncArray, get_array_metadata from zarr.core.common import JSON, AccessModeLiteral, ChunkCoords, MemoryOrder, ZarrFormat from zarr.core.config import config from zarr.core.group import AsyncGroup from zarr.core.metadata.v2 import ArrayV2Metadata from zarr.core.metadata.v3 import ArrayV3Metadata -from zarr.store import ( +from zarr.storage import ( StoreLike, + StorePath, make_store_path, ) @@ -225,6 +227,7 @@ async def open( Return type depends on what exists in the given store. """ zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + store_path = await make_store_path(store, mode=mode, storage_options=storage_options) if path is not None: @@ -243,9 +246,9 @@ async def open( return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) try: - return await open_array(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) + return await open_array(store=store_path, zarr_format=zarr_format, **kwargs) except KeyError: - return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) + return await open_group(store=store_path, zarr_format=zarr_format, **kwargs) async def open_consolidated(*args: Any, **kwargs: Any) -> AsyncGroup: @@ -319,7 +322,8 @@ async def save_array( or _default_zarr_version() ) - store_path = await make_store_path(store, mode="w", storage_options=storage_options) + mode = kwargs.pop("mode", None) + store_path = await make_store_path(store, mode=mode, storage_options=storage_options) if path is not None: store_path = store_path / path new = await AsyncArray.create( @@ -496,7 +500,9 @@ async def group( zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) - store_path = await make_store_path(store, storage_options=storage_options) + mode = None if isinstance(store, Store) else cast(AccessModeLiteral, "a") + + store_path = await make_store_path(store, mode=mode, storage_options=storage_options) if path is not None: store_path = store_path / path @@ -769,7 +775,11 @@ async def create( if meta_array is not None: warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2) - mode = kwargs.pop("mode", cast(AccessModeLiteral, "r" if read_only else "w")) + mode = kwargs.pop("mode", None) + if mode is None: + if not isinstance(store, Store | StorePath): + mode = "a" + store_path = await make_store_path(store, mode=mode, storage_options=storage_options) if path is not None: store_path = store_path / path @@ -945,7 +955,8 @@ async def open_array( The opened array. """ - store_path = await make_store_path(store, storage_options=storage_options) + mode = kwargs.pop("mode", None) + store_path = await make_store_path(store, mode=mode) if path is not None: store_path = store_path / path diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index a9f379ef4..f5d614058 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -11,7 +11,7 @@ if TYPE_CHECKING: from zarr.core.buffer import NDArrayLike from zarr.core.common import JSON, AccessModeLiteral, ChunkCoords, ZarrFormat - from zarr.store import StoreLike + from zarr.storage import StoreLike __all__ = [ "array", diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index e1de15c74..9a78297c6 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -3,13 +3,14 @@ import json from asyncio import gather from dataclasses import dataclass, field, replace +from logging import getLogger from typing import TYPE_CHECKING, Any, Literal, cast import numpy as np import numpy.typing as npt from zarr._compat import _deprecate_positional_args -from zarr.abc.store import set_or_delete +from zarr.abc.store import Store, set_or_delete from zarr.codecs import BytesCodec from zarr.codecs._v2 import V2Compressor, V2Filters from zarr.core.attributes import Attributes @@ -19,7 +20,7 @@ NDBuffer, default_buffer_prototype, ) -from zarr.core.chunk_grids import RegularChunkGrid, _guess_chunks +from zarr.core.chunk_grids import RegularChunkGrid, normalize_chunks from zarr.core.chunk_key_encodings import ( ChunkKeyEncoding, DefaultChunkKeyEncoding, @@ -67,10 +68,8 @@ from zarr.core.metadata.v3 import ArrayV3Metadata from zarr.core.sync import collect_aiterator, sync from zarr.registry import get_pipeline_class -from zarr.store import StoreLike, StorePath, make_store_path -from zarr.store.common import ( - ensure_no_existing_node, -) +from zarr.storage import StoreLike, make_store_path +from zarr.storage.common import StorePath, ensure_no_existing_node if TYPE_CHECKING: from collections.abc import Iterable, Iterator, Sequence @@ -82,6 +81,8 @@ # Array and AsyncArray are defined in the base ``zarr`` namespace __all__ = ["create_codec_pipeline", "parse_array_metadata"] +logger = getLogger(__name__) + def parse_array_metadata(data: Any) -> ArrayV2Metadata | ArrayV3Metadata: if isinstance(data, ArrayV2Metadata | ArrayV3Metadata): @@ -222,15 +223,14 @@ async def create( shape = parse_shapelike(shape) - if chunk_shape is None: - if chunks is None: - chunk_shape = chunks = _guess_chunks(shape=shape, typesize=np.dtype(dtype).itemsize) - else: - chunks = parse_shapelike(chunks) + if chunks is not None and chunk_shape is not None: + raise ValueError("Only one of chunk_shape or chunks can be provided.") - chunk_shape = chunks - elif chunks is not None: - raise ValueError("Only one of chunk_shape or chunks must be provided.") + dtype = np.dtype(dtype) + if chunks: + _chunks = normalize_chunks(chunks, shape, dtype.itemsize) + else: + _chunks = normalize_chunks(chunk_shape, shape, dtype.itemsize) if zarr_format == 3: if dimension_separator is not None: @@ -253,7 +253,7 @@ async def create( store_path, shape=shape, dtype=dtype, - chunk_shape=chunk_shape, + chunk_shape=_chunks, fill_value=fill_value, chunk_key_encoding=chunk_key_encoding, codecs=codecs, @@ -276,7 +276,7 @@ async def create( store_path, shape=shape, dtype=dtype, - chunks=chunk_shape, + chunks=_chunks, dimension_separator=dimension_separator, fill_value=fill_value, order=order, @@ -404,6 +404,10 @@ async def open( metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format) return cls(store_path=store_path, metadata=metadata_dict) + @property + def store(self) -> Store: + return self.store_path.store + @property def ndim(self) -> int: return len(self.metadata.shape) @@ -831,6 +835,10 @@ def open( async_array = sync(AsyncArray.open(store)) return cls(async_array) + @property + def store(self) -> Store: + return self._async_array.store + @property def ndim(self) -> int: return self._async_array.ndim @@ -2380,15 +2388,26 @@ def chunks_initialized(array: Array | AsyncArray) -> tuple[str, ...]: def _build_parents(node: AsyncArray | AsyncGroup) -> list[AsyncGroup]: from zarr.core.group import AsyncGroup, GroupMetadata - required_parts = node.store_path.path.split("/")[:-1] - parents = [] + store = node.store_path.store + path = node.store_path.path + if not path: + return [] + + required_parts = path.split("/")[:-1] + parents = [ + # the root group + AsyncGroup( + metadata=GroupMetadata(zarr_format=node.metadata.zarr_format), + store_path=StorePath(store=store, path=""), + ) + ] for i, part in enumerate(required_parts): - path = "/".join(required_parts[:i] + [part]) + p = "/".join(required_parts[:i] + [part]) parents.append( AsyncGroup( metadata=GroupMetadata(zarr_format=node.metadata.zarr_format), - store_path=StorePath(store=node.store_path.store, path=path), + store_path=StorePath(store=store, path=p), ) ) diff --git a/src/zarr/core/chunk_grids.py b/src/zarr/core/chunk_grids.py index 46209bd16..77734056b 100644 --- a/src/zarr/core/chunk_grids.py +++ b/src/zarr/core/chunk_grids.py @@ -2,11 +2,12 @@ import itertools import math +import numbers import operator from abc import abstractmethod from dataclasses import dataclass from functools import reduce -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any import numpy as np @@ -97,6 +98,49 @@ def _guess_chunks( return tuple(int(x) for x in chunks) +def normalize_chunks(chunks: Any, shape: tuple[int, ...], typesize: int) -> tuple[int, ...]: + """Convenience function to normalize the `chunks` argument for an array + with the given `shape`.""" + + # N.B., expect shape already normalized + + # handle auto-chunking + if chunks is None or chunks is True: + return _guess_chunks(shape, typesize) + + # handle no chunking + if chunks is False: + return shape + + # handle 1D convenience form + if isinstance(chunks, numbers.Integral): + chunks = tuple(int(chunks) for _ in shape) + + # handle dask-style chunks (iterable of iterables) + if all(isinstance(c, (tuple | list)) for c in chunks): + # take first chunk size for each dimension + chunks = tuple( + c[0] for c in chunks + ) # TODO: check/error/warn for irregular chunks (e.g. if c[0] != c[1:-1]) + + # handle bad dimensionality + if len(chunks) > len(shape): + raise ValueError("too many dimensions in chunks") + + # handle underspecified chunks + if len(chunks) < len(shape): + # assume chunks across remaining dimensions + chunks += shape[len(chunks) :] + + # handle None or -1 in chunks + if -1 in chunks or None in chunks: + chunks = tuple( + s if c == -1 or c is None else int(c) for s, c in zip(shape, chunks, strict=False) + ) + + return tuple(int(c) for c in chunks) + + @dataclass(frozen=True) class ChunkGrid(Metadata): @classmethod diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 79d03d3fc..18caea3fd 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -29,8 +29,8 @@ ) from zarr.core.config import config from zarr.core.sync import SyncMixin, sync -from zarr.store import StoreLike, StorePath, make_store_path -from zarr.store.common import ensure_no_existing_node +from zarr.storage import StoreLike, make_store_path +from zarr.storage.common import StorePath, ensure_no_existing_node if TYPE_CHECKING: from collections.abc import AsyncGenerator, Generator, Iterable, Iterator @@ -176,7 +176,9 @@ async def open( # alternatively, we could warn and favor v3 raise ValueError("Both zarr.json and .zgroup objects exist") if zarr_json_bytes is None and zgroup_bytes is None: - raise FileNotFoundError(store_path) + raise FileNotFoundError( + f"could not find zarr.json or .zgroup objects in {store_path}" + ) # set zarr_format based on which keys were found if zarr_json_bytes is not None: zarr_format = 3 @@ -698,6 +700,10 @@ async def _members( "Object at %s is not recognized as a component of a Zarr hierarchy.", key ) + async def keys(self) -> AsyncGenerator[str, None]: + async for key, _ in self.members(): + yield key + async def contains(self, member: str) -> bool: # TODO: this can be made more efficient. try: @@ -821,15 +827,18 @@ def __delitem__(self, key: str) -> None: self._sync(self._async_group.delitem(key)) def __iter__(self) -> Iterator[str]: - raise NotImplementedError + yield from self.keys() def __len__(self) -> int: - raise NotImplementedError + return self.nmembers() def __setitem__(self, key: str, value: Any) -> None: """__setitem__ is not supported in v3""" raise NotImplementedError + def __repr__(self) -> str: + return f"" + async def update_attributes_async(self, new_attributes: dict[str, Any]) -> Group: new_metadata = replace(self.metadata, attributes=new_attributes) @@ -904,6 +913,9 @@ def members(self, max_depth: int | None = 0) -> tuple[tuple[str, Array | Group], return tuple((kv[0], _parse_async_node(kv[1])) for kv in _members) + def keys(self) -> Generator[str, None]: + yield from self._sync_iter(self._async_group.keys()) + def __contains__(self, member: str) -> bool: return self._sync(self._async_group.contains(member)) diff --git a/src/zarr/storage/__init__.py b/src/zarr/storage/__init__.py new file mode 100644 index 000000000..47f70bcc9 --- /dev/null +++ b/src/zarr/storage/__init__.py @@ -0,0 +1,15 @@ +from zarr.storage.common import StoreLike, StorePath, make_store_path +from zarr.storage.local import LocalStore +from zarr.storage.memory import MemoryStore +from zarr.storage.remote import RemoteStore +from zarr.storage.zip import ZipStore + +__all__ = [ + "LocalStore", + "MemoryStore", + "RemoteStore", + "StoreLike", + "StorePath", + "ZipStore", + "make_store_path", +] diff --git a/src/zarr/store/_utils.py b/src/zarr/storage/_utils.py similarity index 100% rename from src/zarr/store/_utils.py rename to src/zarr/storage/_utils.py diff --git a/src/zarr/store/common.py b/src/zarr/storage/common.py similarity index 94% rename from src/zarr/store/common.py rename to src/zarr/storage/common.py index 2d9b1e82c..977fe4ba2 100644 --- a/src/zarr/store/common.py +++ b/src/zarr/storage/common.py @@ -4,12 +4,12 @@ from pathlib import Path from typing import TYPE_CHECKING, Any, Literal -from zarr.abc.store import AccessMode, ByteRangeRequest, Store +from zarr.abc.store import ByteRangeRequest, Store from zarr.core.buffer import Buffer, default_buffer_prototype from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZGROUP_JSON, ZarrFormat from zarr.errors import ContainsArrayAndGroupError, ContainsArrayError, ContainsGroupError -from zarr.store.local import LocalStore -from zarr.store.memory import MemoryStore +from zarr.storage.local import LocalStore +from zarr.storage.memory import MemoryStore # from zarr.store.remote import RemoteStore @@ -83,13 +83,15 @@ async def make_store_path( mode: AccessModeLiteral | None = None, storage_options: dict[str, Any] | None = None, ) -> StorePath: - from zarr.store.remote import RemoteStore # circular import + from zarr.storage.remote import RemoteStore # circular import used_storage_options = False if isinstance(store_like, StorePath): - if mode is not None: - assert AccessMode.from_literal(mode) == store_like.store.mode + if mode is not None and mode != store_like.store.mode.str: + _store = store_like.store.with_mode(mode) + await _store._ensure_open() + store_like = StorePath(_store) result = store_like elif isinstance(store_like, Store): if mode is not None and mode != store_like.mode.str: @@ -97,9 +99,8 @@ async def make_store_path( await store_like._ensure_open() result = StorePath(store_like) elif store_like is None: - if mode is None: - mode = "w" # exception to the default mode = 'r' - result = StorePath(await MemoryStore.open(mode=mode)) + # mode = "w" is an exception to the default mode = 'r' + result = StorePath(await MemoryStore.open(mode=mode or "w")) elif isinstance(store_like, Path): result = StorePath(await LocalStore.open(root=store_like, mode=mode or "r")) elif isinstance(store_like, str): @@ -115,7 +116,7 @@ async def make_store_path( elif isinstance(store_like, dict): # We deliberate only consider dict[str, Buffer] here, and not arbitrary mutable mappings. # By only allowing dictionaries, which are in-memory, we know that MemoryStore appropriate. - result = StorePath(await MemoryStore.open(store_dict=store_like, mode=mode)) + result = StorePath(await MemoryStore.open(store_dict=store_like, mode=mode or "r")) else: msg = f"Unsupported type for store_like: '{type(store_like).__name__}'" # type: ignore[unreachable] raise TypeError(msg) diff --git a/src/zarr/store/local.py b/src/zarr/storage/local.py similarity index 100% rename from src/zarr/store/local.py rename to src/zarr/storage/local.py diff --git a/src/zarr/store/logging.py b/src/zarr/storage/logging.py similarity index 98% rename from src/zarr/store/logging.py rename to src/zarr/storage/logging.py index a9113aabe..52c7c7b84 100644 --- a/src/zarr/store/logging.py +++ b/src/zarr/storage/logging.py @@ -150,9 +150,9 @@ async def set(self, key: str, value: Buffer) -> None: with self.log(): return await self._store.set(key=key, value=value) - async def set_if_not_exists(self, key: str, default: Buffer) -> None: + async def set_if_not_exists(self, key: str, value: Buffer) -> None: with self.log(): - return await self._store.set_if_not_exists(key=key, value=default) + return await self._store.set_if_not_exists(key=key, value=value) async def delete(self, key: str) -> None: with self.log(): diff --git a/src/zarr/store/memory.py b/src/zarr/storage/memory.py similarity index 97% rename from src/zarr/store/memory.py rename to src/zarr/storage/memory.py index 6aec9a6d0..24ea7e004 100644 --- a/src/zarr/store/memory.py +++ b/src/zarr/storage/memory.py @@ -5,7 +5,7 @@ from zarr.abc.store import ByteRangeRequest, Store from zarr.core.buffer import Buffer, gpu from zarr.core.common import concurrent_map -from zarr.store._utils import _normalize_interval_index +from zarr.storage._utils import _normalize_interval_index if TYPE_CHECKING: from collections.abc import AsyncGenerator, Iterable, MutableMapping @@ -101,10 +101,10 @@ async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None else: self._store_dict[key] = value - async def set_if_not_exists(self, key: str, default: Buffer) -> None: + async def set_if_not_exists(self, key: str, value: Buffer) -> None: self._check_writable() await self._ensure_open() - self._store_dict.setdefault(key, default) + self._store_dict.setdefault(key, value) async def delete(self, key: str) -> None: self._check_writable() diff --git a/src/zarr/store/remote.py b/src/zarr/storage/remote.py similarity index 99% rename from src/zarr/store/remote.py rename to src/zarr/storage/remote.py index 9ef40ae22..9ff779ac7 100644 --- a/src/zarr/store/remote.py +++ b/src/zarr/storage/remote.py @@ -6,7 +6,7 @@ from zarr.abc.store import ByteRangeRequest, Store from zarr.core.buffer import Buffer -from zarr.store.common import _dereference_path +from zarr.storage.common import _dereference_path if TYPE_CHECKING: from collections.abc import AsyncGenerator, Iterable diff --git a/src/zarr/store/zip.py b/src/zarr/storage/zip.py similarity index 98% rename from src/zarr/store/zip.py rename to src/zarr/storage/zip.py index 116d6de83..14d0a4362 100644 --- a/src/zarr/store/zip.py +++ b/src/zarr/storage/zip.py @@ -191,12 +191,12 @@ async def set(self, key: str, value: Buffer) -> None: async def set_partial_values(self, key_start_values: Iterable[tuple[str, int, bytes]]) -> None: raise NotImplementedError - async def set_if_not_exists(self, key: str, default: Buffer) -> None: + async def set_if_not_exists(self, key: str, value: Buffer) -> None: self._check_writable() with self._lock: members = self._zf.namelist() if key not in members: - self._set(key, default) + self._set(key, value) async def delete(self, key: str) -> None: raise NotImplementedError diff --git a/src/zarr/store/__init__.py b/src/zarr/store/__init__.py deleted file mode 100644 index dadaf8346..000000000 --- a/src/zarr/store/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -from zarr.store.common import StoreLike, StorePath, make_store_path -from zarr.store.local import LocalStore -from zarr.store.memory import MemoryStore -from zarr.store.remote import RemoteStore -from zarr.store.zip import ZipStore - -__all__ = [ - "LocalStore", - "MemoryStore", - "RemoteStore", - "StoreLike", - "StorePath", - "ZipStore", - "make_store_path", -] diff --git a/src/zarr/testing/buffer.py b/src/zarr/testing/buffer.py index 09e7b33aa..c3694e268 100644 --- a/src/zarr/testing/buffer.py +++ b/src/zarr/testing/buffer.py @@ -7,7 +7,7 @@ import numpy.typing as npt from zarr.core.buffer import Buffer, BufferPrototype, cpu -from zarr.store import MemoryStore +from zarr.storage import MemoryStore if TYPE_CHECKING: from collections.abc import Iterable diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index 5495e6fdf..853064c60 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -6,8 +6,8 @@ from zarr.abc.store import AccessMode, Store from zarr.core.buffer import Buffer, default_buffer_prototype from zarr.core.common import AccessModeLiteral -from zarr.core.sync import _collect_aiterator, collect_aiterator -from zarr.store._utils import _normalize_interval_index +from zarr.core.sync import _collect_aiterator +from zarr.storage._utils import _normalize_interval_index from zarr.testing.utils import assert_bytes_equal __all__ = ["StoreTests"] @@ -120,7 +120,7 @@ async def test_get_many(self, store: S) -> None: values = tuple(f"{k}".encode() for k in keys) for k, v in zip(keys, values, strict=False): self.set(store, k, self.buffer_cls.from_bytes(v)) - observed_buffers = collect_aiterator( + observed_buffers = await _collect_aiterator( store._get_many( zip( keys, diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index ac5850323..234454e28 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -8,7 +8,7 @@ from zarr.core.array import Array from zarr.core.group import Group -from zarr.store import MemoryStore, StoreLike +from zarr.storage import MemoryStore, StoreLike # Copied from Xarray _attr_keys = st.text(st.characters(), min_size=1) diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index 87830f11f..ad3552ad6 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -12,8 +12,8 @@ from zarr import AsyncGroup, config from zarr.abc.store import Store from zarr.core.sync import sync -from zarr.store import LocalStore, MemoryStore, StorePath, ZipStore -from zarr.store.remote import RemoteStore +from zarr.storage import LocalStore, MemoryStore, StorePath, ZipStore +from zarr.storage.remote import RemoteStore if TYPE_CHECKING: from collections.abc import Generator diff --git a/tests/v3/test_api.py b/tests/v3/test_api.py index 0717d542c..218aec5c9 100644 --- a/tests/v3/test_api.py +++ b/tests/v3/test_api.py @@ -10,7 +10,7 @@ from zarr.abc.store import Store from zarr.api.synchronous import create, group, load, open, open_group, save, save_array, save_group from zarr.core.common import ZarrFormat -from zarr.store.memory import MemoryStore +from zarr.storage.memory import MemoryStore def test_create_array(memory_store: Store) -> None: @@ -43,8 +43,9 @@ async def test_open_array(memory_store: MemoryStore) -> None: assert z.shape == (100,) # open array, overwrite - store._store_dict = {} - z = open(store=store, shape=200, mode="w") # mode="w" + # store._store_dict = {} + store = MemoryStore(mode="w") + z = open(store=store, shape=200) assert isinstance(z, Array) assert z.shape == (200,) diff --git a/tests/v3/test_array.py b/tests/v3/test_array.py index 5de4a4d12..5778f7e8f 100644 --- a/tests/v3/test_array.py +++ b/tests/v3/test_array.py @@ -15,8 +15,8 @@ from zarr.core.indexing import ceildiv from zarr.core.sync import sync from zarr.errors import ContainsArrayError, ContainsGroupError -from zarr.store import LocalStore, MemoryStore -from zarr.store.common import StorePath +from zarr.storage import LocalStore, MemoryStore +from zarr.storage.common import StorePath @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) @@ -92,10 +92,9 @@ async def test_create_creates_parents( expected = [f"{part}/{file}" for file in files for part in parts] if zarr_format == 2: - expected.append("a/b/c/d/.zarray") - expected.append("a/b/c/d/.zattrs") + expected.extend([".zattrs", ".zgroup", "a/b/c/d/.zarray", "a/b/c/d/.zattrs"]) else: - expected.append("a/b/c/d/zarr.json") + expected.extend(["zarr.json", "a/b/c/d/zarr.json"]) expected = sorted(expected) diff --git a/tests/v3/test_attributes.py b/tests/v3/test_attributes.py index 14c60492b..12097eb2b 100644 --- a/tests/v3/test_attributes.py +++ b/tests/v3/test_attributes.py @@ -1,10 +1,10 @@ import zarr.core import zarr.core.attributes -import zarr.store +import zarr.storage def test_put() -> None: - store = zarr.store.MemoryStore({}, mode="w") + store = zarr.storage.MemoryStore({}, mode="w") attrs = zarr.core.attributes.Attributes( zarr.Group.from_store(store, attributes={"a": 1, "b": 2}) ) @@ -14,7 +14,7 @@ def test_put() -> None: def test_asdict() -> None: - store = zarr.store.MemoryStore({}, mode="w") + store = zarr.storage.MemoryStore({}, mode="w") attrs = zarr.core.attributes.Attributes( zarr.Group.from_store(store, attributes={"a": 1, "b": 2}) ) diff --git a/tests/v3/test_buffer.py b/tests/v3/test_buffer.py index cde3f8578..60816d764 100644 --- a/tests/v3/test_buffer.py +++ b/tests/v3/test_buffer.py @@ -13,8 +13,8 @@ from zarr.codecs.transpose import TransposeCodec from zarr.codecs.zstd import ZstdCodec from zarr.core.buffer import ArrayLike, BufferPrototype, NDArrayLike, cpu, gpu -from zarr.store.common import StorePath -from zarr.store.memory import MemoryStore +from zarr.storage.common import StorePath +from zarr.storage.memory import MemoryStore from zarr.testing.buffer import ( NDBufferUsingTestNDArrayLike, StoreExpectingTestBuffer, diff --git a/tests/v3/test_chunk_grids.py b/tests/v3/test_chunk_grids.py index 12166bd21..4c69c483a 100644 --- a/tests/v3/test_chunk_grids.py +++ b/tests/v3/test_chunk_grids.py @@ -1,7 +1,9 @@ +from typing import Any + import numpy as np import pytest -from zarr.core.chunk_grids import _guess_chunks +from zarr.core.chunk_grids import _guess_chunks, normalize_chunks @pytest.mark.parametrize( @@ -16,3 +18,37 @@ def test_guess_chunks(shape: tuple[int, ...], itemsize: int) -> None: assert chunk_size < (64 * 1024 * 1024) # doesn't make any sense to allow chunks to have zero length dimension assert all(0 < c <= max(s, 1) for c, s in zip(chunks, shape, strict=False)) + + +@pytest.mark.parametrize( + ("chunks", "shape", "typesize", "expected"), + [ + ((10,), (100,), 1, (10,)), + ([10], (100,), 1, (10,)), + (10, (100,), 1, (10,)), + ((10, 10), (100, 10), 1, (10, 10)), + (10, (100, 10), 1, (10, 10)), + ((10, None), (100, 10), 1, (10, 10)), + (30, (100, 20, 10), 1, (30, 30, 30)), + ((30,), (100, 20, 10), 1, (30, 20, 10)), + ((30, None), (100, 20, 10), 1, (30, 20, 10)), + ((30, None, None), (100, 20, 10), 1, (30, 20, 10)), + ((30, 20, None), (100, 20, 10), 1, (30, 20, 10)), + ((30, 20, 10), (100, 20, 10), 1, (30, 20, 10)), + # auto chunking + (None, (100,), 1, (100,)), + (-1, (100,), 1, (100,)), + ((30, -1, None), (100, 20, 10), 1, (30, 20, 10)), + ], +) +def test_normalize_chunks( + chunks: Any, shape: tuple[int, ...], typesize: int, expected: tuple[int, ...] +) -> None: + assert expected == normalize_chunks(chunks, shape, typesize) + + +def test_normalize_chunks_errors() -> None: + with pytest.raises(ValueError): + normalize_chunks("foo", (100,), 1) + with pytest.raises(ValueError): + normalize_chunks((100, 10), (100,), 1) diff --git a/tests/v3/test_codecs/test_blosc.py b/tests/v3/test_codecs/test_blosc.py index 982b0213b..416a2f784 100644 --- a/tests/v3/test_codecs/test_blosc.py +++ b/tests/v3/test_codecs/test_blosc.py @@ -7,7 +7,7 @@ from zarr.abc.store import Store from zarr.codecs import BloscCodec, BytesCodec, ShardingCodec from zarr.core.buffer import default_buffer_prototype -from zarr.store.common import StorePath +from zarr.storage.common import StorePath @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) diff --git a/tests/v3/test_codecs/test_codecs.py b/tests/v3/test_codecs/test_codecs.py index 75b1d15d0..7a5fb979a 100644 --- a/tests/v3/test_codecs/test_codecs.py +++ b/tests/v3/test_codecs/test_codecs.py @@ -16,7 +16,7 @@ ) from zarr.core.buffer import default_buffer_prototype from zarr.core.indexing import Selection, morton_order_iter -from zarr.store import StorePath +from zarr.storage import StorePath if TYPE_CHECKING: from zarr.abc.codec import Codec @@ -274,15 +274,6 @@ async def test_dimension_names(store: Store) -> None: @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_invalid_metadata(store: Store) -> None: - spath = StorePath(store, "invalid_metadata") - with pytest.raises(ValueError): - Array.create( - spath, - shape=(16, 16, 16), - chunk_shape=(16, 16), - dtype=np.dtype("uint8"), - fill_value=0, - ) spath2 = StorePath(store, "invalid_endian") with pytest.raises(TypeError): Array.create( diff --git a/tests/v3/test_codecs/test_endian.py b/tests/v3/test_codecs/test_endian.py index 81b24e734..db4e77451 100644 --- a/tests/v3/test_codecs/test_endian.py +++ b/tests/v3/test_codecs/test_endian.py @@ -6,7 +6,7 @@ from zarr import AsyncArray from zarr.abc.store import Store from zarr.codecs import BytesCodec -from zarr.store.common import StorePath +from zarr.storage.common import StorePath from .test_codecs import _AsyncArrayProxy diff --git a/tests/v3/test_codecs/test_gzip.py b/tests/v3/test_codecs/test_gzip.py index 277cbd9e4..7b4d23181 100644 --- a/tests/v3/test_codecs/test_gzip.py +++ b/tests/v3/test_codecs/test_gzip.py @@ -4,7 +4,7 @@ from zarr import Array from zarr.abc.store import Store from zarr.codecs import BytesCodec, GzipCodec -from zarr.store.common import StorePath +from zarr.storage.common import StorePath @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) diff --git a/tests/v3/test_codecs/test_sharding.py b/tests/v3/test_codecs/test_sharding.py index ecf2ea7bd..c0dcfbf35 100644 --- a/tests/v3/test_codecs/test_sharding.py +++ b/tests/v3/test_codecs/test_sharding.py @@ -15,7 +15,7 @@ TransposeCodec, ) from zarr.core.buffer import default_buffer_prototype -from zarr.store.common import StorePath +from zarr.storage.common import StorePath from ..conftest import ArrayRequest from .test_codecs import _AsyncArrayProxy, order_from_dim diff --git a/tests/v3/test_codecs/test_transpose.py b/tests/v3/test_codecs/test_transpose.py index a14ace720..2b3914150 100644 --- a/tests/v3/test_codecs/test_transpose.py +++ b/tests/v3/test_codecs/test_transpose.py @@ -7,7 +7,7 @@ from zarr.abc.store import Store from zarr.codecs import BytesCodec, ShardingCodec, TransposeCodec from zarr.core.common import MemoryOrder -from zarr.store.common import StorePath +from zarr.storage.common import StorePath from .test_codecs import _AsyncArrayProxy diff --git a/tests/v3/test_codecs/test_zstd.py b/tests/v3/test_codecs/test_zstd.py index cf80a8053..29efc2946 100644 --- a/tests/v3/test_codecs/test_zstd.py +++ b/tests/v3/test_codecs/test_zstd.py @@ -4,7 +4,7 @@ from zarr import Array from zarr.abc.store import Store from zarr.codecs import BytesCodec, ZstdCodec -from zarr.store.common import StorePath +from zarr.storage.common import StorePath @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index cb1681daa..15b9658e4 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -15,8 +15,8 @@ from zarr.core.group import GroupMetadata from zarr.core.sync import sync from zarr.errors import ContainsArrayError, ContainsGroupError -from zarr.store import LocalStore, MemoryStore, StorePath -from zarr.store.common import make_store_path +from zarr.storage import LocalStore, MemoryStore, StorePath +from zarr.storage.common import make_store_path from .conftest import parse_store @@ -62,6 +62,19 @@ async def test_create_creates_parents(store: Store, zarr_format: ZarrFormat) -> await zarr.api.asynchronous.open_group( store=store, path="a", zarr_format=zarr_format, attributes={"key": "value"} ) + objs = {x async for x in store.list()} + if zarr_format == 2: + assert objs == {".zgroup", ".zattrs", "a/.zgroup", "a/.zattrs"} + else: + assert objs == {"zarr.json", "a/zarr.json"} + + # test that root group node was created + root = await zarr.api.asynchronous.open_group( + store=store, + ) + agroup = await root.getitem("a") + assert agroup.attrs == {"key": "value"} + # create a child node with a couple intermediates await zarr.api.asynchronous.open_group(store=store, path="a/b/c/d", zarr_format=zarr_format) parts = ["a", "a/b", "a/b/c"] @@ -74,10 +87,9 @@ async def test_create_creates_parents(store: Store, zarr_format: ZarrFormat) -> expected = [f"{part}/{file}" for file in files for part in parts] if zarr_format == 2: - expected.append("a/b/c/d/.zgroup") - expected.append("a/b/c/d/.zattrs") + expected.extend([".zgroup", ".zattrs", "a/b/c/d/.zgroup", "a/b/c/d/.zattrs"]) else: - expected.append("a/b/c/d/zarr.json") + expected.extend(["zarr.json", "a/b/c/d/zarr.json"]) expected = sorted(expected) @@ -230,9 +242,7 @@ def test_group_create(store: Store, exists_ok: bool, zarr_format: ZarrFormat) -> if not exists_ok: with pytest.raises(ContainsGroupError): - group = Group.from_store( - store, attributes=attributes, exists_ok=exists_ok, zarr_format=zarr_format - ) + _ = Group.from_store(store, exists_ok=exists_ok, zarr_format=zarr_format) def test_group_open(store: Store, zarr_format: ZarrFormat, exists_ok: bool) -> None: @@ -311,8 +321,7 @@ def test_group_iter(store: Store, zarr_format: ZarrFormat) -> None: """ group = Group.from_store(store, zarr_format=zarr_format) - with pytest.raises(NotImplementedError): - list(group) + assert list(group) == [] def test_group_len(store: Store, zarr_format: ZarrFormat) -> None: @@ -321,8 +330,7 @@ def test_group_len(store: Store, zarr_format: ZarrFormat) -> None: """ group = Group.from_store(store, zarr_format=zarr_format) - with pytest.raises(NotImplementedError): - len(group) + assert len(group) == 0 def test_group_setitem(store: Store, zarr_format: ZarrFormat) -> None: diff --git a/tests/v3/test_indexing.py b/tests/v3/test_indexing.py index 59169c67b..ce01c85b1 100644 --- a/tests/v3/test_indexing.py +++ b/tests/v3/test_indexing.py @@ -25,8 +25,8 @@ replace_ellipsis, ) from zarr.registry import get_ndbuffer_class -from zarr.store.common import StorePath -from zarr.store.memory import MemoryStore +from zarr.storage.common import StorePath +from zarr.storage.memory import MemoryStore if TYPE_CHECKING: from collections.abc import AsyncGenerator diff --git a/tests/v3/test_store/test_core.py b/tests/v3/test_store/test_core.py index f40149112..b2a8292ea 100644 --- a/tests/v3/test_store/test_core.py +++ b/tests/v3/test_store/test_core.py @@ -3,10 +3,10 @@ import pytest -from zarr.store.common import StoreLike, StorePath, make_store_path -from zarr.store.local import LocalStore -from zarr.store.memory import MemoryStore -from zarr.store.remote import RemoteStore +from zarr.storage.common import StoreLike, StorePath, make_store_path +from zarr.storage.local import LocalStore +from zarr.storage.memory import MemoryStore +from zarr.storage.remote import RemoteStore async def test_make_store_path(tmpdir: str) -> None: @@ -59,7 +59,7 @@ async def test_make_store_path_fsspec(monkeypatch) -> None: ) async def test_make_store_path_storage_options_raises(store_like: StoreLike) -> None: with pytest.raises(TypeError, match="storage_options"): - await make_store_path(store_like, storage_options={"foo": "bar"}, mode="w") + await make_store_path(store_like, storage_options={"foo": "bar"}) async def test_unsupported() -> None: diff --git a/tests/v3/test_store/test_local.py b/tests/v3/test_store/test_local.py index bdd909c28..1fd466b39 100644 --- a/tests/v3/test_store/test_local.py +++ b/tests/v3/test_store/test_local.py @@ -3,7 +3,7 @@ import pytest from zarr.core.buffer import Buffer, cpu -from zarr.store.local import LocalStore +from zarr.storage.local import LocalStore from zarr.testing.store import StoreTests diff --git a/tests/v3/test_store/test_logging.py b/tests/v3/test_store/test_logging.py index b03c9b94f..0258244c5 100644 --- a/tests/v3/test_store/test_logging.py +++ b/tests/v3/test_store/test_logging.py @@ -5,9 +5,9 @@ import pytest import zarr -import zarr.store +import zarr.storage from zarr.core.buffer import default_buffer_prototype -from zarr.store.logging import LoggingStore +from zarr.storage.logging import LoggingStore if TYPE_CHECKING: from zarr.abc.store import Store @@ -52,7 +52,7 @@ async def test_logging_store_counter(store: Store) -> None: async def test_with_mode(): - wrapped = LoggingStore(store=zarr.store.MemoryStore(mode="w"), log_level="INFO") + wrapped = LoggingStore(store=zarr.storage.MemoryStore(mode="w"), log_level="INFO") new = wrapped.with_mode(mode="r") assert new.mode.str == "r" assert new.log_level == "INFO" diff --git a/tests/v3/test_store/test_memory.py b/tests/v3/test_store/test_memory.py index efb61b332..092aad2ba 100644 --- a/tests/v3/test_store/test_memory.py +++ b/tests/v3/test_store/test_memory.py @@ -3,7 +3,7 @@ import pytest from zarr.core.buffer import Buffer, cpu, gpu -from zarr.store.memory import GpuMemoryStore, MemoryStore +from zarr.storage.memory import GpuMemoryStore, MemoryStore from zarr.testing.store import StoreTests from zarr.testing.utils import gpu_test diff --git a/tests/v3/test_store/test_remote.py b/tests/v3/test_store/test_remote.py index 18ba1e6d1..93a0d60df 100644 --- a/tests/v3/test_store/test_remote.py +++ b/tests/v3/test_store/test_remote.py @@ -12,7 +12,7 @@ import zarr.api.asynchronous from zarr.core.buffer import Buffer, cpu, default_buffer_prototype from zarr.core.sync import _collect_aiterator, sync -from zarr.store import RemoteStore +from zarr.storage import RemoteStore from zarr.testing.store import StoreTests if TYPE_CHECKING: diff --git a/tests/v3/test_store/test_stateful_store.py b/tests/v3/test_store/test_stateful_store.py index efa1953a5..9ac3bbc3f 100644 --- a/tests/v3/test_store/test_stateful_store.py +++ b/tests/v3/test_store/test_stateful_store.py @@ -16,7 +16,7 @@ import zarr from zarr.abc.store import AccessMode, Store from zarr.core.buffer import BufferPrototype, cpu, default_buffer_prototype -from zarr.store import LocalStore, ZipStore +from zarr.storage import LocalStore, ZipStore from zarr.testing.strategies import key_ranges from zarr.testing.strategies import keys as zarr_keys diff --git a/tests/v3/test_store/test_zip.py b/tests/v3/test_store/test_zip.py index e99b921be..a99f9b95b 100644 --- a/tests/v3/test_store/test_zip.py +++ b/tests/v3/test_store/test_zip.py @@ -10,7 +10,7 @@ import zarr from zarr.abc.store import AccessMode from zarr.core.buffer import Buffer, cpu, default_buffer_prototype -from zarr.store.zip import ZipStore +from zarr.storage.zip import ZipStore from zarr.testing.store import StoreTests if TYPE_CHECKING: diff --git a/tests/v3/test_sync.py b/tests/v3/test_sync.py index 864c9e01c..081e65f3d 100644 --- a/tests/v3/test_sync.py +++ b/tests/v3/test_sync.py @@ -6,7 +6,7 @@ import zarr from zarr.core.sync import SyncError, SyncMixin, _get_lock, _get_loop, sync -from zarr.store.memory import MemoryStore +from zarr.storage.memory import MemoryStore @pytest.fixture(params=[True, False]) diff --git a/tests/v3/test_v2.py b/tests/v3/test_v2.py index 943c425f5..95a5f6660 100644 --- a/tests/v3/test_v2.py +++ b/tests/v3/test_v2.py @@ -7,7 +7,7 @@ import zarr from zarr import Array -from zarr.store import MemoryStore, StorePath +from zarr.storage import MemoryStore, StorePath @pytest.fixture