Skip to content

Commit

Permalink
Merge branch 'v3' into feature/logging-store
Browse files Browse the repository at this point in the history
  • Loading branch information
jhamman authored Sep 26, 2024
2 parents 3d79a4e + e968ac5 commit 76c7815
Show file tree
Hide file tree
Showing 48 changed files with 327 additions and 279 deletions.
6 changes: 3 additions & 3 deletions .github/ISSUE_TEMPLATE/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ contact_links:
- name: ✨ Propose a new major feature
url: https://github.com/zarr-developers/zarr-specs
about: A new major feature should be discussed in the Zarr specifications repository.
- name: ❓ Discuss something on gitter
url: https://gitter.im/zarr-developers/community
about: For questions like "How do I do X with Zarr?", you can move to our Gitter channel.
- name: ❓ Discuss something on ZulipChat
url: https://ossci.zulipchat.com/
about: For questions like "How do I do X with Zarr?", you can move to our ZulipChat.
- name: ❓ Discuss something on GitHub Discussions
url: https://github.com/zarr-developers/zarr-python/discussions
about: For questions like "How do I do X with Zarr?", you can move to GitHub Discussions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,5 @@ fixture/
.DS_Store
tests/.hypothesis
.hypothesis/

zarr/version.py
2 changes: 1 addition & 1 deletion bench/compress_normal.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
a,
chunks=1000000,
compression="blosc",
compression_opts=dict(cname="lz4", clevel=5, shuffle=2),
compression_opts={"cname": "lz4", "clevel": 5, "shuffle": 2},
)
print(z)

Expand Down
37 changes: 32 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -207,18 +207,45 @@ extend-exclude = [

[tool.ruff.lint]
extend-select = [
"B", # flake8-bugbear
"I", # isort
"ISC",
"UP", # pyupgrade
"RSE",
"B", # flake8-bugbear
"C4", # flake8-comprehensions
"FLY", # flynt
"I", # isort
"ISC", # flake8-implicit-str-concat
"PGH", # pygrep-hooks
"PT", # flake8-pytest-style
"PYI", # flake8-pyi
"RSE", # flake8-raise
"RET", # flake8-return
"RUF",
"TCH", # flake8-type-checking
"TRY", # tryceratops
"UP", # pyupgrade
]
ignore = [
"PT004", # deprecated
"PT011", # TODO: apply this rule
"PT012", # TODO: apply this rule
"PYI013",
"RET505",
"RET506",
"RUF005",
"TRY003",
# https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules
"W191",
"E111",
"E114",
"E117",
"D206",
"D300",
"Q000",
"Q001",
"Q002",
"Q003",
"COM812",
"COM819",
"ISC001",
"ISC002",
]

[tool.mypy]
Expand Down
12 changes: 9 additions & 3 deletions src/zarr/abc/store.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from abc import ABC, abstractmethod
from asyncio import gather
from collections.abc import AsyncGenerator, Iterable
from types import TracebackType
from typing import Any, NamedTuple, Protocol, runtime_checkable

from typing_extensions import Self
Expand Down Expand Up @@ -35,7 +36,7 @@ class Store(ABC):
_mode: AccessMode
_is_open: bool

def __init__(self, mode: AccessModeLiteral = "r", *args: Any, **kwargs: Any):
def __init__(self, mode: AccessModeLiteral = "r", *args: Any, **kwargs: Any) -> None:
self._is_open = False
self._mode = AccessMode.from_literal(mode)

Expand All @@ -49,7 +50,12 @@ def __enter__(self) -> Self:
"""Enter a context manager that will close the store upon exiting."""
return self

def __exit__(self, *args: Any) -> None:
def __exit__(
self,
exc_type: type[BaseException] | None,
exc_value: BaseException | None,
traceback: TracebackType | None,
) -> None:
"""Close the store."""
self.close()

Expand Down Expand Up @@ -164,7 +170,7 @@ async def _set_many(self, values: Iterable[tuple[str, Buffer]]) -> None:
Insert multiple (key, value) pairs into storage.
"""
await gather(*(self.set(key, value) for key, value in values))
return None
return

@property
@abstractmethod
Expand Down
10 changes: 5 additions & 5 deletions src/zarr/api/asynchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import asyncio
import warnings
from typing import TYPE_CHECKING, Any, Literal, Union, cast
from typing import TYPE_CHECKING, Any, Literal, cast

import numpy as np
import numpy.typing as npt
Expand All @@ -25,6 +25,10 @@
from zarr.core.buffer import NDArrayLike
from zarr.core.chunk_key_encodings import ChunkKeyEncoding

# TODO: this type could use some more thought
ArrayLike = AsyncArray | Array | npt.NDArray[Any]
PathLike = str

__all__ = [
"consolidate_metadata",
"copy",
Expand Down Expand Up @@ -53,10 +57,6 @@
"zeros_like",
]

# TODO: this type could use some more thought, noqa to avoid "Variable "asynchronous.ArrayLike" is not valid as a type"
ArrayLike = Union[AsyncArray | Array | npt.NDArray[Any]] # noqa
PathLike = str


def _get_shape_chunks(a: ArrayLike | Any) -> tuple[ChunkCoords | None, ChunkCoords | None]:
"""helper function to get the shape and chunks from an array-like object"""
Expand Down
12 changes: 6 additions & 6 deletions src/zarr/codecs/_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,18 @@

from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec
from zarr.core.buffer import Buffer, NDBuffer, default_buffer_prototype
from zarr.core.common import JSON, to_thread
from zarr.core.common import to_thread
from zarr.registry import get_ndbuffer_class

if TYPE_CHECKING:
import numcodecs.abc

from zarr.core.array_spec import ArraySpec


@dataclass(frozen=True)
class V2Compressor(ArrayBytesCodec):
compressor: dict[str, JSON] | None
compressor: numcodecs.abc.Codec | None

is_fixed_size = False

Expand All @@ -27,9 +29,8 @@ async def _decode_single(
chunk_spec: ArraySpec,
) -> NDBuffer:
if self.compressor is not None:
compressor = numcodecs.get_codec(self.compressor)
chunk_numpy_array = ensure_ndarray(
await to_thread(compressor.decode, chunk_bytes.as_array_like())
await to_thread(self.compressor.decode, chunk_bytes.as_array_like())
)
else:
chunk_numpy_array = ensure_ndarray(chunk_bytes.as_array_like())
Expand All @@ -47,14 +48,13 @@ async def _encode_single(
) -> Buffer | None:
chunk_numpy_array = chunk_array.as_numpy_array()
if self.compressor is not None:
compressor = numcodecs.get_codec(self.compressor)
if (
not chunk_numpy_array.flags.c_contiguous
and not chunk_numpy_array.flags.f_contiguous
):
chunk_numpy_array = chunk_numpy_array.copy(order="A")
encoded_chunk_bytes = ensure_bytes(
await to_thread(compressor.encode, chunk_numpy_array)
await to_thread(self.compressor.encode, chunk_numpy_array)
)
else:
encoded_chunk_bytes = ensure_bytes(chunk_numpy_array)
Expand Down
8 changes: 4 additions & 4 deletions src/zarr/codecs/sharding.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def is_dense(self, chunk_byte_length: int) -> bool:

# Are all non-empty offsets unique?
if len(
set(offset for offset, _ in sorted_offsets_and_lengths if offset != MAX_UINT_64)
{offset for offset, _ in sorted_offsets_and_lengths if offset != MAX_UINT_64}
) != len(sorted_offsets_and_lengths):
return False

Expand Down Expand Up @@ -380,8 +380,8 @@ def to_dict(self) -> dict[str, JSON]:
"name": "sharding_indexed",
"configuration": {
"chunk_shape": self.chunk_shape,
"codecs": tuple([s.to_dict() for s in self.codecs]),
"index_codecs": tuple([s.to_dict() for s in self.index_codecs]),
"codecs": tuple(s.to_dict() for s in self.codecs),
"index_codecs": tuple(s.to_dict() for s in self.index_codecs),
"index_location": self.index_location.value,
},
}
Expand Down Expand Up @@ -477,7 +477,7 @@ async def _decode_partial_single(
)

indexed_chunks = list(indexer)
all_chunk_coords = set(chunk_coords for chunk_coords, _, _ in indexed_chunks)
all_chunk_coords = {chunk_coords for chunk_coords, _, _ in indexed_chunks}

# reading bytes of all requested chunks
shard_dict: ShardMapping = {}
Expand Down
6 changes: 2 additions & 4 deletions src/zarr/codecs/transpose.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,16 +96,14 @@ async def _decode_single(
chunk_spec: ArraySpec,
) -> NDBuffer:
inverse_order = np.argsort(self.order)
chunk_array = chunk_array.transpose(inverse_order)
return chunk_array
return chunk_array.transpose(inverse_order)

async def _encode_single(
self,
chunk_array: NDBuffer,
_chunk_spec: ArraySpec,
) -> NDBuffer | None:
chunk_array = chunk_array.transpose(self.order)
return chunk_array
return chunk_array.transpose(self.order)

def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
return input_byte_length
Expand Down
15 changes: 3 additions & 12 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import numpy.typing as npt

from zarr._compat import _deprecate_positional_args
from zarr.abc.codec import Codec, CodecPipeline
from zarr.abc.store import set_or_delete
from zarr.codecs import BytesCodec
from zarr.codecs._v2 import V2Compressor, V2Filters
Expand Down Expand Up @@ -110,7 +109,7 @@ def __init__(
metadata: ArrayMetadata,
store_path: StorePath,
order: Literal["C", "F"] | None = None,
):
) -> None:
metadata_parsed = parse_array_metadata(metadata)
order_parsed = parse_indexing_order(order or config.get("array.order"))

Expand Down Expand Up @@ -252,12 +251,6 @@ async def _create_v3(
shape = parse_shapelike(shape)
codecs = list(codecs) if codecs is not None else [BytesCodec()]

if fill_value is None:
if dtype == np.dtype("bool"):
fill_value = False
else:
fill_value = 0

if chunk_key_encoding is None:
chunk_key_encoding = ("default", "/")
assert chunk_key_encoding is not None
Expand All @@ -281,7 +274,6 @@ async def _create_v3(
)

array = cls(metadata=metadata, store_path=store_path)

await array._save_metadata(metadata)
return array

Expand All @@ -294,7 +286,7 @@ async def _create_v2(
dtype: npt.DTypeLike,
chunks: ChunkCoords,
dimension_separator: Literal[".", "/"] | None = None,
fill_value: None | int | float = None,
fill_value: None | float = None,
order: Literal["C", "F"] | None = None,
filters: list[dict[str, JSON]] | None = None,
compressor: dict[str, JSON] | None = None,
Expand Down Expand Up @@ -331,8 +323,7 @@ def from_dict(
data: dict[str, JSON],
) -> AsyncArray:
metadata = parse_array_metadata(data)
async_array = cls(metadata=metadata, store_path=store_path)
return async_array
return cls(metadata=metadata, store_path=store_path)

@classmethod
async def open(
Expand Down
2 changes: 1 addition & 1 deletion src/zarr/core/attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@


class Attributes(MutableMapping[str, JSON]):
def __init__(self, obj: Array | Group):
def __init__(self, obj: Array | Group) -> None:
# key=".zattrs", read_only=False, cache=True, synchronizer=None
self._obj = obj

Expand Down
14 changes: 9 additions & 5 deletions src/zarr/core/buffer/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def ravel(self, order: Literal["K", "A", "C", "F"] = ...) -> Self: ...

def all(self) -> bool: ...

def __eq__(self, other: Any) -> Self: # type: ignore[explicit-override, override]
def __eq__(self, other: object) -> Self: # type: ignore[explicit-override, override]
"""Element-wise equal
Notes
Expand Down Expand Up @@ -136,7 +136,7 @@ class Buffer(ABC):
array-like object that must be 1-dim, contiguous, and byte dtype.
"""

def __init__(self, array_like: ArrayLike):
def __init__(self, array_like: ArrayLike) -> None:
if array_like.ndim != 1:
raise ValueError("array_like: only 1-dim allowed")
if array_like.dtype != np.dtype("b"):
Expand Down Expand Up @@ -313,7 +313,7 @@ class NDBuffer:
ndarray-like object that is convertible to a regular Numpy array.
"""

def __init__(self, array: NDArrayLike):
def __init__(self, array: NDArrayLike) -> None:
# assert array.ndim > 0
assert array.dtype != object
self._data = array
Expand Down Expand Up @@ -464,10 +464,14 @@ def __repr__(self) -> str:

def all_equal(self, other: Any, equal_nan: bool = True) -> bool:
"""Compare to `other` using np.array_equal."""
if other is None:
# Handle None fill_value for Zarr V2
return False
# use array_equal to obtain equal_nan=True functionality
data, other = np.broadcast_arrays(self._data, other)
result = np.array_equal(self._data, other, equal_nan=equal_nan)
return result
return np.array_equal(
self._data, other, equal_nan=equal_nan if self._data.dtype.kind not in "US" else False
)

def fill(self, value: Any) -> None:
self._data.fill(value)
Expand Down
4 changes: 2 additions & 2 deletions src/zarr/core/buffer/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class Buffer(core.Buffer):
array-like object that must be 1-dim, contiguous, and byte dtype.
"""

def __init__(self, array_like: ArrayLike):
def __init__(self, array_like: ArrayLike) -> None:
super().__init__(array_like)

@classmethod
Expand Down Expand Up @@ -143,7 +143,7 @@ class NDBuffer(core.NDBuffer):
ndarray-like object that is convertible to a regular Numpy array.
"""

def __init__(self, array: NDArrayLike):
def __init__(self, array: NDArrayLike) -> None:
super().__init__(array)

@classmethod
Expand Down
4 changes: 2 additions & 2 deletions src/zarr/core/buffer/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class Buffer(core.Buffer):
array-like object that must be 1-dim, contiguous, and byte dtype.
"""

def __init__(self, array_like: ArrayLike):
def __init__(self, array_like: ArrayLike) -> None:
if cp is None:
raise ImportError(
"Cannot use zarr.buffer.gpu.Buffer without cupy. Please install cupy."
Expand Down Expand Up @@ -137,7 +137,7 @@ class NDBuffer(core.NDBuffer):
ndarray-like object that is convertible to a regular Numpy array.
"""

def __init__(self, array: NDArrayLike):
def __init__(self, array: NDArrayLike) -> None:
if cp is None:
raise ImportError(
"Cannot use zarr.buffer.gpu.NDBuffer without cupy. Please install cupy."
Expand Down
Loading

0 comments on commit 76c7815

Please sign in to comment.