diff --git a/.github/workflows/hypothesis.yaml b/.github/workflows/hypothesis.yaml new file mode 100644 index 000000000..c5a239c27 --- /dev/null +++ b/.github/workflows/hypothesis.yaml @@ -0,0 +1,84 @@ +name: Slow Hypothesis CI +on: + push: + branches: + - "main" + - "v3" + pull_request: + branches: + - "main" + - "v3" + types: [opened, reopened, synchronize, labeled] + schedule: + - cron: "0 0 * * *" # Daily “At 00:00” UTC + workflow_dispatch: # allows you to trigger manually + +env: + FORCE_COLOR: 3 + +jobs: + + hypothesis: + name: Slow Hypothesis Tests + runs-on: "ubuntu-latest" + defaults: + run: + shell: bash -l {0} + + strategy: + matrix: + python-version: ['3.11'] + numpy-version: ['1.26'] + dependency-set: ["optional"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Install Hatch + run: | + python -m pip install --upgrade pip + pip install hatch + - name: Set Up Hatch Env + run: | + hatch env create test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} + hatch env run -e test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} list-env + # https://github.com/actions/cache/blob/main/tips-and-workarounds.md#update-a-cache + - name: Restore cached hypothesis directory + id: restore-hypothesis-cache + uses: actions/cache/restore@v4 + with: + path: .hypothesis/ + key: cache-hypothesis-${{ runner.os }}-${{ github.run_id }} + restore-keys: | + cache-hypothesis- + + - name: Run slow Hypothesis tests + if: success() + id: status + run: | + hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run-hypothesis + + # explicitly save the cache so it gets updated, also do this even if it fails. + - name: Save cached hypothesis directory + id: save-hypothesis-cache + if: always() && steps.status.outcome != 'skipped' + uses: actions/cache/save@v4 + with: + path: .hypothesis/ + key: cache-hypothesis-${{ runner.os }}-${{ github.run_id }} + + - name: Generate and publish the report + if: | + failure() + && steps.status.outcome == 'failure' + && github.event_name == 'schedule' + && github.repository_owner == 'zarr-developers' + uses: xarray-contrib/issue-from-pytest-log@v1 + with: + log-path: output-${{ matrix.python-version }}-log.jsonl + issue-title: "Nightly Hypothesis tests failed" + issue-label: "topic-hypothesis" diff --git a/.gitignore b/.gitignore index 35957f2c9..84bcb00ff 100644 --- a/.gitignore +++ b/.gitignore @@ -78,5 +78,8 @@ src/zarr/_version.py #test_sync* data/* src/fixture/ +fixture/ .DS_Store +tests/.hypothesis +.hypothesis/ diff --git a/pyproject.toml b/pyproject.toml index f1be6725b..93116bede 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -120,7 +120,8 @@ extra-dependencies = [ "flask-cors", "flask", "requests", - "mypy" + "mypy", + "hypothesis" ] features = ["extra"] @@ -139,6 +140,7 @@ run-coverage = "pytest --cov-config=pyproject.toml --cov=pkg --cov=tests" run = "run-coverage --no-cov" run-verbose = "run-coverage --verbose" run-mypy = "mypy src" +run-hypothesis = "pytest --hypothesis-profile ci tests/v3/test_properties.py" list-env = "pip list" [tool.hatch.envs.docs] diff --git a/src/zarr/strategies.py b/src/zarr/strategies.py new file mode 100644 index 000000000..91a8542ce --- /dev/null +++ b/src/zarr/strategies.py @@ -0,0 +1,145 @@ +from typing import Any + +import hypothesis.extra.numpy as npst +import hypothesis.strategies as st +import numpy as np +from hypothesis import given, settings # noqa + +from .array import Array +from .group import Group +from .store import MemoryStore, StoreLike + +# Copied from Xarray +_attr_keys = st.text(st.characters(), min_size=1) +_attr_values = st.recursive( + st.none() | st.booleans() | st.text(st.characters(), max_size=5), + lambda children: st.lists(children) | st.dictionaries(_attr_keys, children), + max_leaves=3, +) + +# From https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#node-names +# 1. must not be the empty string ("") +# 2. must not include the character "/" +# 3. must not be a string composed only of period characters, e.g. "." or ".." +# 4. must not start with the reserved prefix "__" +zarr_key_chars = st.sampled_from( + ".-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz" +) +node_names = st.text(zarr_key_chars, min_size=1).filter( + lambda t: t not in (".", "..") and not t.startswith("__") +) +array_names = node_names +attrs = st.none() | st.dictionaries(_attr_keys, _attr_values) +paths = st.lists(node_names, min_size=1).map(lambda x: "/".join(x)) | st.just("/") +np_arrays = npst.arrays( + # TODO: re-enable timedeltas once they are supported + dtype=npst.scalar_dtypes().filter(lambda x: x.kind != "m"), + shape=npst.array_shapes(max_dims=4), +) +stores = st.builds(MemoryStore, st.just({}), mode=st.just("w")) +compressors = st.sampled_from([None, "default"]) + + +@st.composite # type: ignore[misc] +def np_array_and_chunks( + draw: st.DrawFn, *, arrays: st.SearchStrategy[np.ndarray] = np_arrays +) -> tuple[np.ndarray, tuple[int]]: # type: ignore[type-arg] + """A hypothesis strategy to generate small sized random arrays. + + Returns: a tuple of the array and a suitable random chunking for it. + """ + array = draw(arrays) + # We want this strategy to shrink towards arrays with smaller number of chunks + # 1. st.integers() shrinks towards smaller values. So we use that to generate number of chunks + numchunks = draw(st.tuples(*[st.integers(min_value=1, max_value=size) for size in array.shape])) + # 2. and now generate the chunks tuple + chunks = tuple(size // nchunks for size, nchunks in zip(array.shape, numchunks, strict=True)) + return (array, chunks) + + +@st.composite # type: ignore[misc] +def arrays( + draw: st.DrawFn, + *, + compressors: st.SearchStrategy = compressors, + stores: st.SearchStrategy[StoreLike] = stores, + arrays: st.SearchStrategy[np.ndarray] = np_arrays, + paths: st.SearchStrategy[None | str] = paths, + array_names: st.SearchStrategy = array_names, + attrs: st.SearchStrategy = attrs, +) -> Array: + store = draw(stores) + nparray, chunks = draw(np_array_and_chunks(arrays=arrays)) + path = draw(paths) + name = draw(array_names) + attributes = draw(attrs) + # compressor = draw(compressors) + + # TODO: clean this up + # if path is None and name is None: + # array_path = None + # array_name = None + # elif path is None and name is not None: + # array_path = f"{name}" + # array_name = f"/{name}" + # elif path is not None and name is None: + # array_path = path + # array_name = None + # elif path == "/": + # assert name is not None + # array_path = name + # array_name = "/" + name + # else: + # assert name is not None + # array_path = f"{path}/{name}" + # array_name = "/" + array_path + + expected_attrs = {} if attributes is None else attributes + + array_path = path + ("/" if not path.endswith("/") else "") + name + root = Group.create(store) + fill_value_args: tuple[Any, ...] = tuple() + if nparray.dtype.kind == "M": + fill_value_args = ("ns",) + + a = root.create_array( + array_path, + shape=nparray.shape, + chunks=chunks, + dtype=nparray.dtype.str, + attributes=attributes, + # compressor=compressor, # TODO: FIXME + fill_value=nparray.dtype.type(0, *fill_value_args), + ) + + assert isinstance(a, Array) + assert nparray.shape == a.shape + assert chunks == a.chunks + assert array_path == a.path, (path, name, array_path, a.name, a.path) + # assert array_path == a.name, (path, name, array_path, a.name, a.path) + # assert a.basename is None # TODO + # assert a.store == normalize_store_arg(store) + assert dict(a.attrs) == expected_attrs + + a[:] = nparray + + return a + + +def is_negative_slice(idx: Any) -> bool: + return isinstance(idx, slice) and idx.step is not None and idx.step < 0 + + +@st.composite # type: ignore[misc] +def basic_indices(draw: st.DrawFn, *, shape: tuple[int], **kwargs): # type: ignore[no-untyped-def] + """Basic indices without unsupported negative slices.""" + return draw( + npst.basic_indices(shape=shape, **kwargs).filter( + lambda idxr: ( + not ( + is_negative_slice(idxr) + or (isinstance(idxr, tuple) and any(is_negative_slice(idx) for idx in idxr)) + ) + ) + ) + ) diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index 8b75d9f2f..74972ccae 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -17,6 +17,7 @@ import numpy as np import pytest +from hypothesis import HealthCheck, Verbosity, settings from zarr.store import LocalStore, MemoryStore, StorePath from zarr.store.remote import RemoteStore @@ -111,3 +112,17 @@ def array_fixture(request: pytest.FixtureRequest) -> np.ndarray: .reshape(array_request.shape, order=array_request.order) .astype(array_request.dtype) ) + + +settings.register_profile( + "ci", + max_examples=1000, + deadline=None, + suppress_health_check=[HealthCheck.filter_too_much, HealthCheck.too_slow], +) +settings.register_profile( + "local", + max_examples=300, + suppress_health_check=[HealthCheck.filter_too_much, HealthCheck.too_slow], + verbosity=Verbosity.verbose, +) diff --git a/tests/v3/test_properties.py b/tests/v3/test_properties.py new file mode 100644 index 000000000..d339f5dff --- /dev/null +++ b/tests/v3/test_properties.py @@ -0,0 +1,69 @@ +import numpy as np +import pytest +from numpy.testing import assert_array_equal + +pytest.importorskip("hypothesis") + +import hypothesis.extra.numpy as npst # noqa +import hypothesis.strategies as st # noqa +from hypothesis import given, settings # noqa +from zarr.strategies import arrays, np_arrays, basic_indices # noqa + + +@given(st.data()) +def test_roundtrip(data): + nparray = data.draw(np_arrays) + zarray = data.draw(arrays(arrays=st.just(nparray))) + assert_array_equal(nparray, zarray[:]) + + +@given(data=st.data()) +def test_basic_indexing(data): + zarray = data.draw(arrays()) + nparray = zarray[:] + indexer = data.draw(basic_indices(shape=nparray.shape)) + actual = zarray[indexer] + assert_array_equal(nparray[indexer], actual) + + new_data = np.ones_like(actual) + zarray[indexer] = new_data + nparray[indexer] = new_data + assert_array_equal(nparray, zarray[:]) + + +@given(data=st.data()) +def test_vindex(data): + zarray = data.draw(arrays()) + nparray = zarray[:] + + indexer = data.draw( + npst.integer_array_indices( + shape=nparray.shape, result_shape=npst.array_shapes(max_dims=None) + ) + ) + actual = zarray.vindex[indexer] + assert_array_equal(nparray[indexer], actual) + + +# @st.composite +# def advanced_indices(draw, *, shape): +# basic_idxr = draw( +# basic_indices( +# shape=shape, min_dims=len(shape), max_dims=len(shape), allow_ellipsis=False +# ).filter(lambda x: isinstance(x, tuple)) +# ) + +# int_idxr = draw( +# npst.integer_array_indices(shape=shape, result_shape=npst.array_shapes(max_dims=1)) +# ) +# args = tuple( +# st.sampled_from((l, r)) for l, r in zip_longest(basic_idxr, int_idxr, fillvalue=slice(None)) +# ) +# return draw(st.tuples(*args)) + + +# @given(st.data()) +# def test_roundtrip_object_array(data): +# nparray = data.draw(np_arrays) +# zarray = data.draw(arrays(arrays=st.just(nparray))) +# assert_array_equal(nparray, zarray[:])