zarr-developers · jhamman · Aug 8, 2024 · Apr 5, 2024 · Jun 3, 2024 · Jun 4, 2024
diff --git a/.github/workflows/hypothesis.yaml b/.github/workflows/hypothesis.yaml
@@ -0,0 +1,84 @@
+name: Slow Hypothesis CI
+on:
+  push:
+    branches:
+      - "main"
+      - "v3"
+  pull_request:
+    branches:
+      - "main"
+      - "v3"
+    types: [opened, reopened, synchronize, labeled]
+  schedule:
+    - cron: "0 0 * * *" # Daily “At 00:00” UTC
+  workflow_dispatch: # allows you to trigger manually
+
+env:
+  FORCE_COLOR: 3
+
+jobs:
+
+  hypothesis:
+    name: Slow Hypothesis Tests
+    runs-on: "ubuntu-latest"
+    defaults:
+      run:
+        shell: bash -l {0}
+
+    strategy:
+      matrix:
+        python-version: ['3.11']
+        numpy-version: ['1.26']
+        dependency-set: ["optional"]
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+        cache: 'pip'
+    - name: Install Hatch
+      run: |
+        python -m pip install --upgrade pip
+        pip install hatch
+    - name: Set Up Hatch Env
+      run: |
+        hatch env create test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }}
+        hatch env run -e test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} list-env
+    # https://github.com/actions/cache/blob/main/tips-and-workarounds.md#update-a-cache
+    - name: Restore cached hypothesis directory
+      id: restore-hypothesis-cache
+      uses: actions/cache/restore@v4
+      with:
+        path: .hypothesis/
+        key: cache-hypothesis-${{ runner.os }}-${{ github.run_id }}
+        restore-keys: |
+          cache-hypothesis-
+
+    - name: Run slow Hypothesis tests
+      if: success()
+      id: status
+      run: |
+        hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run-hypothesis
+
+    # explicitly save the cache so it gets updated, also do this even if it fails.
+    - name: Save cached hypothesis directory
+      id: save-hypothesis-cache
+      if: always() && steps.status.outcome != 'skipped'
+      uses: actions/cache/save@v4
+      with:
+        path: .hypothesis/
+        key: cache-hypothesis-${{ runner.os }}-${{ github.run_id }}
+
+    - name: Generate and publish the report
+      if: |
+        failure()
+        && steps.status.outcome == 'failure'
+        && github.event_name == 'schedule'
+        && github.repository_owner == 'zarr-developers'
+      uses: xarray-contrib/issue-from-pytest-log@v1
+      with:
+        log-path: output-${{ matrix.python-version }}-log.jsonl
+        issue-title: "Nightly Hypothesis tests failed"
+        issue-label: "topic-hypothesis"
diff --git a/.gitignore b/.gitignore
@@ -81,3 +81,5 @@ data/*
 src/fixture/
 
 .DS_Store
+tests/.hypothesis
+.hypothesis/
diff --git a/pyproject.toml b/pyproject.toml
@@ -112,7 +112,8 @@ extra-dependencies = [
     "msgpack",
     "lmdb",
     "pytest-asyncio",
-    "mypy"
+    "mypy",
+    "hypothesis",
 ]
 features = ["extra"]
 
@@ -131,6 +132,7 @@ run-coverage = "pytest --cov-config=pyproject.toml --cov=pkg --cov=tests"
 run = "run-coverage --no-cov"
 run-verbose = "run-coverage --verbose"
 run-mypy = "mypy src"
+run-hypothesis = "pytest --hypothesis-show-statistics tests/test_properties.py"
 list-env = "pip list"
 
 [tool.hatch.envs.docs]

diff --git a/src/zarr/strategies.py b/src/zarr/strategies.py
@@ -0,0 +1,139 @@
+from typing import Any
+
+import hypothesis.extra.numpy as npst
+import hypothesis.strategies as st
+import numpy as np
+from hypothesis import given, settings  # noqa
+
+from .array import Array
+from .group import Group
+from .store import MemoryStore, StoreLike
+
+# Copied from Xarray
+_attr_keys = st.text(st.characters(), min_size=1)
+_attr_values = st.recursive(
+    st.none() | st.booleans() | st.text(st.characters(), max_size=5),
+    lambda children: st.lists(children) | st.dictionaries(_attr_keys, children),
+    max_leaves=3,
+)
+
+# No '/' in array names?
+# No '.' in paths?
+zarr_key_chars = st.sampled_from("-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz")
+
+# The following should be public strategies
+attrs = st.none() | st.dictionaries(_attr_keys, _attr_values)
+paths = st.none() | st.text(zarr_key_chars, min_size=1) | st.just("/")
+array_names = st.text(zarr_key_chars | st.just("."), min_size=1).filter(
+    lambda t: not t.startswith((".", ".."))
+)
+np_arrays = npst.arrays(
+    # FIXME: re-enable timedeltas once we figure out the fill_value issue.
+    dtype=npst.scalar_dtypes().filter(lambda x: x.kind != "m"),
+    shape=npst.array_shapes(max_dims=4),
+)
+stores = st.builds(MemoryStore, st.just({}), mode=st.just("w"))
+compressors = st.sampled_from([None, "default"])
+
+
+@st.composite  # type: ignore[misc]
+def np_array_and_chunks(
+    draw: st.DrawFn, *, arrays: st.SearchStrategy[np.ndarray] = np_arrays
+) -> tuple[np.ndarray, tuple[int]]:
+    """A hypothesis strategy to generate small sized random arrays.
+
+    Returns: a tuple of the array and a suitable random chunking for it.
+    """
+    array = draw(arrays)
+    # We want this strategy to shrink towards arrays with smaller number of chunks
+    # 1. st.integers() shrinks towards smaller values. So we use that to generate number of chunks
+    numchunks = draw(st.tuples(*[st.integers(min_value=1, max_value=size) for size in array.shape]))
+    # 2. and now generate the chunks tuple
+    chunks = tuple(size // nchunks for size, nchunks in zip(array.shape, numchunks, strict=True))
+    return (array, chunks)
+
+
+@st.composite  # type: ignore[misc]
+def arrays(
+    draw: st.DrawFn,
+    *,
+    compressors: st.SearchStrategy = compressors,
+    stores: st.SearchStrategy[StoreLike] = stores,
+    arrays: st.SearchStrategy[np.ndarray] = np_arrays,
+    paths: st.SearchStrategy[None | str] = paths,
+    array_names: st.SearchStrategy = array_names,
+    attrs: st.SearchStrategy = attrs,
+) -> Array:
+    store = draw(stores)
+    nparray, chunks = draw(np_array_and_chunks(arrays=arrays))
+    path = draw(paths)
+    name = draw(array_names)
+    attributes = draw(attrs)
+    # compressor = draw(compressors)
+
+    # TODO: clean this up
+    if path is None and name is None:
+        array_path = None
+        array_name = None
+    elif path is None and name is not None:
+        array_path = f"{name}"
+        array_name = f"/{name}"
+    elif path is not None and name is None:
+        array_path = path
+        array_name = None
+    elif path == "/":
+        assert name is not None
+        array_path = name
+        array_name = "/" + name
+    else:
+        assert name is not None
+        array_path = f"{path}/{name}"
+        array_name = "/" + array_path
+
+    expected_attrs = {} if attributes is None else attributes
+
+    root = Group.create(store)
+    a = root.create_array(
+        array_path,
+        shape=nparray.shape,
+        chunks=chunks,
+        dtype=nparray.dtype.str,
+        attributes=attributes,
+        # compressor=compressor,  # TODO: FIXME
+        # TODO: FIXME seems to break with booleans and timedelta
+        # fill_value=nparray.dtype.type(0),
+    )
+
+    assert isinstance(a, Array)
+    assert nparray.shape == a.shape
+    # assert chunks == a.chunks  # TODO: adapt for v2, v3
+    assert array_path == a.path
+    assert array_name == a.name
+    # assert a.basename is None  # TODO
+    # assert a.store == normalize_store_arg(store)
+    assert dict(a.attrs) == expected_attrs
+
+    a[:] = nparray
+
+    store.close()
+
+    return a
+
+
+def is_negative_slice(idx: Any) -> bool:
+    return isinstance(idx, slice) and idx.step is not None and idx.step < 0
+
+
+@st.composite  # type: ignore[misc]
+def basic_indices(draw: st.DrawFn, *, shape: tuple[int]):
+    """Basic indices without unsupported negative slices."""
+    return draw(
+        npst.basic_indices(shape=shape).filter(
+            lambda idxr: (
+                not (
+                    is_negative_slice(idxr)
+                    or (isinstance(idxr, tuple) and any(is_negative_slice(idx) for idx in idxr))
+                )
+            )
+        )
+    )
diff --git a/tests/test_properties.py b/tests/test_properties.py
@@ -0,0 +1,48 @@
+import numpy as np
+import pytest
+from numpy.testing import assert_array_equal
+
+pytest.importorskip("hypothesis")
+
+import hypothesis.extra.numpy as npst  # noqa
+import hypothesis.strategies as st  # noqa
+from hypothesis import given, settings  # noqa
+from zarr.strategies import arrays, np_arrays, basic_indices  # noqa
+
+
+# @pytest.mark.slow
+@settings(max_examples=300)
+@given(st.data())
+def test_roundtrip(data):
+    nparray = data.draw(np_arrays)
+    zarray = data.draw(arrays(arrays=st.just(nparray)))
+    assert_array_equal(nparray, zarray[:])
+
+
+@given(st.data())
+def test_roundtrip_object_array(data):
+    nparray = data.draw(np_arrays)
+    zarray = data.draw(arrays(arrays=st.just(nparray)))
+    assert_array_equal(nparray, zarray[:])
+
+
+# @pytest.mark.slow
+@settings(max_examples=500)
+@given(data=st.data())
+def test_basic_indexing(data):
+    zarray = data.draw(arrays())
+    nparray = zarray[:]
+    indexer = data.draw(basic_indices(shape=nparray.shape))
+    actual = zarray[indexer]
+    assert_array_equal(nparray[indexer], actual)
+
+    new_data = np.ones_like(actual)
+    zarray[indexer] = new_data
+    nparray[indexer] = new_data
+    assert_array_equal(nparray, zarray[:])
+
+
+@settings(max_examples=500)
+@given(data=st.data())
+def test_advanced_indexing(data):
+    pass