Skip to content

Commit

Permalink
Add hypothesis property tests (#1746)
Browse files Browse the repository at this point in the history
* Add hypothesis tests

1. Roundtrip a numpy array
2. Basic Indexing

* Add compressors

This is important for #1931

* Add more test

* Add zarr_version

* Revert "Add zarr_version"

This reverts commit 2ad1b35.

* ADapt for V3

* Add workflow

* Try again

* always run

* fix env

* Try typing

* Cleanup

* Add vindex

* Review feedback

* cleanup

* WIP

* Cleanup

* Move to v3/

* another type ignore

* Add `_`

* Update src/zarr/strategies.py

* Update src/zarr/strategies.py

* style: pre-commit fixes

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
dcherian and pre-commit-ci[bot] authored Aug 8, 2024
1 parent 064b2e0 commit 334d6fe
Show file tree
Hide file tree
Showing 6 changed files with 319 additions and 1 deletion.
84 changes: 84 additions & 0 deletions .github/workflows/hypothesis.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
name: Slow Hypothesis CI
on:
push:
branches:
- "main"
- "v3"
pull_request:
branches:
- "main"
- "v3"
types: [opened, reopened, synchronize, labeled]
schedule:
- cron: "0 0 * * *" # Daily “At 00:00” UTC
workflow_dispatch: # allows you to trigger manually

env:
FORCE_COLOR: 3

jobs:

hypothesis:
name: Slow Hypothesis Tests
runs-on: "ubuntu-latest"
defaults:
run:
shell: bash -l {0}

strategy:
matrix:
python-version: ['3.11']
numpy-version: ['1.26']
dependency-set: ["optional"]

steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
- name: Install Hatch
run: |
python -m pip install --upgrade pip
pip install hatch
- name: Set Up Hatch Env
run: |
hatch env create test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }}
hatch env run -e test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} list-env
# https://github.com/actions/cache/blob/main/tips-and-workarounds.md#update-a-cache
- name: Restore cached hypothesis directory
id: restore-hypothesis-cache
uses: actions/cache/restore@v4
with:
path: .hypothesis/
key: cache-hypothesis-${{ runner.os }}-${{ github.run_id }}
restore-keys: |
cache-hypothesis-
- name: Run slow Hypothesis tests
if: success()
id: status
run: |
hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run-hypothesis
# explicitly save the cache so it gets updated, also do this even if it fails.
- name: Save cached hypothesis directory
id: save-hypothesis-cache
if: always() && steps.status.outcome != 'skipped'
uses: actions/cache/save@v4
with:
path: .hypothesis/
key: cache-hypothesis-${{ runner.os }}-${{ github.run_id }}

- name: Generate and publish the report
if: |
failure()
&& steps.status.outcome == 'failure'
&& github.event_name == 'schedule'
&& github.repository_owner == 'zarr-developers'
uses: xarray-contrib/issue-from-pytest-log@v1
with:
log-path: output-${{ matrix.python-version }}-log.jsonl
issue-title: "Nightly Hypothesis tests failed"
issue-label: "topic-hypothesis"
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -78,5 +78,8 @@ src/zarr/_version.py
#test_sync*
data/*
src/fixture/
fixture/

.DS_Store
tests/.hypothesis
.hypothesis/
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ extra-dependencies = [
"flask-cors",
"flask",
"requests",
"mypy"
"mypy",
"hypothesis"
]
features = ["extra"]

Expand All @@ -139,6 +140,7 @@ run-coverage = "pytest --cov-config=pyproject.toml --cov=pkg --cov=tests"
run = "run-coverage --no-cov"
run-verbose = "run-coverage --verbose"
run-mypy = "mypy src"
run-hypothesis = "pytest --hypothesis-profile ci tests/v3/test_properties.py"
list-env = "pip list"

[tool.hatch.envs.docs]
Expand Down
145 changes: 145 additions & 0 deletions src/zarr/strategies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
from typing import Any

import hypothesis.extra.numpy as npst
import hypothesis.strategies as st
import numpy as np
from hypothesis import given, settings # noqa

from .array import Array
from .group import Group
from .store import MemoryStore, StoreLike

# Copied from Xarray
_attr_keys = st.text(st.characters(), min_size=1)
_attr_values = st.recursive(
st.none() | st.booleans() | st.text(st.characters(), max_size=5),
lambda children: st.lists(children) | st.dictionaries(_attr_keys, children),
max_leaves=3,
)

# From https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#node-names
# 1. must not be the empty string ("")
# 2. must not include the character "/"
# 3. must not be a string composed only of period characters, e.g. "." or ".."
# 4. must not start with the reserved prefix "__"
zarr_key_chars = st.sampled_from(
".-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
)
node_names = st.text(zarr_key_chars, min_size=1).filter(
lambda t: t not in (".", "..") and not t.startswith("__")
)
array_names = node_names
attrs = st.none() | st.dictionaries(_attr_keys, _attr_values)
paths = st.lists(node_names, min_size=1).map(lambda x: "/".join(x)) | st.just("/")
np_arrays = npst.arrays(
# TODO: re-enable timedeltas once they are supported
dtype=npst.scalar_dtypes().filter(lambda x: x.kind != "m"),
shape=npst.array_shapes(max_dims=4),
)
stores = st.builds(MemoryStore, st.just({}), mode=st.just("w"))
compressors = st.sampled_from([None, "default"])


@st.composite # type: ignore[misc]
def np_array_and_chunks(
draw: st.DrawFn, *, arrays: st.SearchStrategy[np.ndarray] = np_arrays
) -> tuple[np.ndarray, tuple[int]]: # type: ignore[type-arg]
"""A hypothesis strategy to generate small sized random arrays.
Returns: a tuple of the array and a suitable random chunking for it.
"""
array = draw(arrays)
# We want this strategy to shrink towards arrays with smaller number of chunks
# 1. st.integers() shrinks towards smaller values. So we use that to generate number of chunks
numchunks = draw(st.tuples(*[st.integers(min_value=1, max_value=size) for size in array.shape]))
# 2. and now generate the chunks tuple
chunks = tuple(size // nchunks for size, nchunks in zip(array.shape, numchunks, strict=True))
return (array, chunks)


@st.composite # type: ignore[misc]
def arrays(
draw: st.DrawFn,
*,
compressors: st.SearchStrategy = compressors,
stores: st.SearchStrategy[StoreLike] = stores,
arrays: st.SearchStrategy[np.ndarray] = np_arrays,
paths: st.SearchStrategy[None | str] = paths,
array_names: st.SearchStrategy = array_names,
attrs: st.SearchStrategy = attrs,
) -> Array:
store = draw(stores)
nparray, chunks = draw(np_array_and_chunks(arrays=arrays))
path = draw(paths)
name = draw(array_names)
attributes = draw(attrs)
# compressor = draw(compressors)

# TODO: clean this up
# if path is None and name is None:
# array_path = None
# array_name = None
# elif path is None and name is not None:
# array_path = f"{name}"
# array_name = f"/{name}"
# elif path is not None and name is None:
# array_path = path
# array_name = None
# elif path == "/":
# assert name is not None
# array_path = name
# array_name = "/" + name
# else:
# assert name is not None
# array_path = f"{path}/{name}"
# array_name = "/" + array_path

expected_attrs = {} if attributes is None else attributes

array_path = path + ("/" if not path.endswith("/") else "") + name
root = Group.create(store)
fill_value_args: tuple[Any, ...] = tuple()
if nparray.dtype.kind == "M":
fill_value_args = ("ns",)

a = root.create_array(
array_path,
shape=nparray.shape,
chunks=chunks,
dtype=nparray.dtype.str,
attributes=attributes,
# compressor=compressor, # TODO: FIXME
fill_value=nparray.dtype.type(0, *fill_value_args),
)

assert isinstance(a, Array)
assert nparray.shape == a.shape
assert chunks == a.chunks
assert array_path == a.path, (path, name, array_path, a.name, a.path)
# assert array_path == a.name, (path, name, array_path, a.name, a.path)
# assert a.basename is None # TODO
# assert a.store == normalize_store_arg(store)
assert dict(a.attrs) == expected_attrs

a[:] = nparray

return a


def is_negative_slice(idx: Any) -> bool:
return isinstance(idx, slice) and idx.step is not None and idx.step < 0


@st.composite # type: ignore[misc]
def basic_indices(draw: st.DrawFn, *, shape: tuple[int], **kwargs): # type: ignore[no-untyped-def]
"""Basic indices without unsupported negative slices."""
return draw(
npst.basic_indices(shape=shape, **kwargs).filter(
lambda idxr: (
not (
is_negative_slice(idxr)
or (isinstance(idxr, tuple) and any(is_negative_slice(idx) for idx in idxr))
)
)
)
)
15 changes: 15 additions & 0 deletions tests/v3/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import numpy as np
import pytest
from hypothesis import HealthCheck, Verbosity, settings

from zarr.store import LocalStore, MemoryStore, StorePath
from zarr.store.remote import RemoteStore
Expand Down Expand Up @@ -119,3 +120,17 @@ def array_fixture(request: pytest.FixtureRequest) -> np.ndarray:
.reshape(array_request.shape, order=array_request.order)
.astype(array_request.dtype)
)


settings.register_profile(
"ci",
max_examples=1000,
deadline=None,
suppress_health_check=[HealthCheck.filter_too_much, HealthCheck.too_slow],
)
settings.register_profile(
"local",
max_examples=300,
suppress_health_check=[HealthCheck.filter_too_much, HealthCheck.too_slow],
verbosity=Verbosity.verbose,
)
69 changes: 69 additions & 0 deletions tests/v3/test_properties.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import numpy as np
import pytest
from numpy.testing import assert_array_equal

pytest.importorskip("hypothesis")

import hypothesis.extra.numpy as npst # noqa
import hypothesis.strategies as st # noqa
from hypothesis import given, settings # noqa
from zarr.strategies import arrays, np_arrays, basic_indices # noqa


@given(st.data())
def test_roundtrip(data):
nparray = data.draw(np_arrays)
zarray = data.draw(arrays(arrays=st.just(nparray)))
assert_array_equal(nparray, zarray[:])


@given(data=st.data())
def test_basic_indexing(data):
zarray = data.draw(arrays())
nparray = zarray[:]
indexer = data.draw(basic_indices(shape=nparray.shape))
actual = zarray[indexer]
assert_array_equal(nparray[indexer], actual)

new_data = np.ones_like(actual)
zarray[indexer] = new_data
nparray[indexer] = new_data
assert_array_equal(nparray, zarray[:])


@given(data=st.data())
def test_vindex(data):
zarray = data.draw(arrays())
nparray = zarray[:]

indexer = data.draw(
npst.integer_array_indices(
shape=nparray.shape, result_shape=npst.array_shapes(max_dims=None)
)
)
actual = zarray.vindex[indexer]
assert_array_equal(nparray[indexer], actual)


# @st.composite
# def advanced_indices(draw, *, shape):
# basic_idxr = draw(
# basic_indices(
# shape=shape, min_dims=len(shape), max_dims=len(shape), allow_ellipsis=False
# ).filter(lambda x: isinstance(x, tuple))
# )

# int_idxr = draw(
# npst.integer_array_indices(shape=shape, result_shape=npst.array_shapes(max_dims=1))
# )
# args = tuple(
# st.sampled_from((l, r)) for l, r in zip_longest(basic_idxr, int_idxr, fillvalue=slice(None))
# )
# return draw(st.tuples(*args))


# @given(st.data())
# def test_roundtrip_object_array(data):
# nparray = data.draw(np_arrays)
# zarray = data.draw(arrays(arrays=st.just(nparray)))
# assert_array_equal(nparray, zarray[:])

0 comments on commit 334d6fe

Please sign in to comment.