Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add hypothesis property tests #1746

Merged
merged 26 commits into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions .github/workflows/hypothesis.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
name: Slow Hypothesis CI
on:
push:
branches:
- "main"
- "v3"
pull_request:
branches:
- "main"
- "v3"
types: [opened, reopened, synchronize, labeled]
schedule:
- cron: "0 0 * * *" # Daily “At 00:00” UTC
workflow_dispatch: # allows you to trigger manually

env:
FORCE_COLOR: 3

jobs:

hypothesis:
name: Slow Hypothesis Tests
runs-on: "ubuntu-latest"
defaults:
run:
shell: bash -l {0}

strategy:
matrix:
python-version: ['3.11']
numpy-version: ['1.26']
dependency-set: ["optional"]

steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
- name: Install Hatch
run: |
python -m pip install --upgrade pip
pip install hatch
- name: Set Up Hatch Env
run: |
hatch env create test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }}
hatch env run -e test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} list-env
# https://github.com/actions/cache/blob/main/tips-and-workarounds.md#update-a-cache
- name: Restore cached hypothesis directory
id: restore-hypothesis-cache
uses: actions/cache/restore@v4
with:
path: .hypothesis/
key: cache-hypothesis-${{ runner.os }}-${{ github.run_id }}
restore-keys: |
cache-hypothesis-

- name: Run slow Hypothesis tests
if: success()
id: status
run: |
hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run-hypothesis

# explicitly save the cache so it gets updated, also do this even if it fails.
- name: Save cached hypothesis directory
id: save-hypothesis-cache
if: always() && steps.status.outcome != 'skipped'
uses: actions/cache/save@v4
with:
path: .hypothesis/
key: cache-hypothesis-${{ runner.os }}-${{ github.run_id }}

- name: Generate and publish the report
if: |
failure()
&& steps.status.outcome == 'failure'
&& github.event_name == 'schedule'
&& github.repository_owner == 'zarr-developers'
uses: xarray-contrib/issue-from-pytest-log@v1
with:
log-path: output-${{ matrix.python-version }}-log.jsonl
issue-title: "Nightly Hypothesis tests failed"
issue-label: "topic-hypothesis"
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -78,5 +78,8 @@ src/zarr/_version.py
#test_sync*
data/*
src/fixture/
fixture/

.DS_Store
tests/.hypothesis
.hypothesis/
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ extra-dependencies = [
"flask-cors",
"flask",
"requests",
"mypy"
"mypy",
"hypothesis"
]
features = ["extra"]

Expand All @@ -139,6 +140,7 @@ run-coverage = "pytest --cov-config=pyproject.toml --cov=pkg --cov=tests"
run = "run-coverage --no-cov"
run-verbose = "run-coverage --verbose"
run-mypy = "mypy src"
run-hypothesis = "pytest --hypothesis-profile ci tests/v3/test_properties.py"
list-env = "pip list"

[tool.hatch.envs.docs]
Expand Down
145 changes: 145 additions & 0 deletions src/zarr/strategies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
from typing import Any

import hypothesis.extra.numpy as npst
import hypothesis.strategies as st
import numpy as np
from hypothesis import given, settings # noqa

from .array import Array
from .group import Group
from .store import MemoryStore, StoreLike

# Copied from Xarray
_attr_keys = st.text(st.characters(), min_size=1)
_attr_values = st.recursive(
st.none() | st.booleans() | st.text(st.characters(), max_size=5),
lambda children: st.lists(children) | st.dictionaries(_attr_keys, children),
max_leaves=3,
)

# From https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#node-names
# 1. must not be the empty string ("")
# 2. must not include the character "/"
# 3. must not be a string composed only of period characters, e.g. "." or ".."
# 4. must not start with the reserved prefix "__"
zarr_key_chars = st.sampled_from(
dcherian marked this conversation as resolved.
Show resolved Hide resolved
".-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
)
node_names = st.text(zarr_key_chars, min_size=1).filter(
lambda t: t not in (".", "..") and not t.startswith("__")
)
array_names = node_names
attrs = st.none() | st.dictionaries(_attr_keys, _attr_values)
paths = st.lists(node_names, min_size=1).map(lambda x: "/".join(x)) | st.just("/")
np_arrays = npst.arrays(
# TODO: re-enable timedeltas once they are supported
dtype=npst.scalar_dtypes().filter(lambda x: x.kind != "m"),
dcherian marked this conversation as resolved.
Show resolved Hide resolved
shape=npst.array_shapes(max_dims=4),
)
stores = st.builds(MemoryStore, st.just({}), mode=st.just("w"))
compressors = st.sampled_from([None, "default"])
dcherian marked this conversation as resolved.
Show resolved Hide resolved


@st.composite # type: ignore[misc]
def np_array_and_chunks(
draw: st.DrawFn, *, arrays: st.SearchStrategy[np.ndarray] = np_arrays
) -> tuple[np.ndarray, tuple[int]]: # type: ignore[type-arg]
"""A hypothesis strategy to generate small sized random arrays.

Returns: a tuple of the array and a suitable random chunking for it.
"""
array = draw(arrays)
# We want this strategy to shrink towards arrays with smaller number of chunks
# 1. st.integers() shrinks towards smaller values. So we use that to generate number of chunks
numchunks = draw(st.tuples(*[st.integers(min_value=1, max_value=size) for size in array.shape]))
# 2. and now generate the chunks tuple
chunks = tuple(size // nchunks for size, nchunks in zip(array.shape, numchunks, strict=True))
return (array, chunks)


@st.composite # type: ignore[misc]
def arrays(
draw: st.DrawFn,
*,
compressors: st.SearchStrategy = compressors,
stores: st.SearchStrategy[StoreLike] = stores,
arrays: st.SearchStrategy[np.ndarray] = np_arrays,
paths: st.SearchStrategy[None | str] = paths,
array_names: st.SearchStrategy = array_names,
attrs: st.SearchStrategy = attrs,
) -> Array:
store = draw(stores)
nparray, chunks = draw(np_array_and_chunks(arrays=arrays))
path = draw(paths)
name = draw(array_names)
attributes = draw(attrs)
# compressor = draw(compressors)

# TODO: clean this up
# if path is None and name is None:
# array_path = None
# array_name = None
# elif path is None and name is not None:
# array_path = f"{name}"
# array_name = f"/{name}"
# elif path is not None and name is None:
# array_path = path
# array_name = None
# elif path == "/":
# assert name is not None
# array_path = name
# array_name = "/" + name
# else:
# assert name is not None
# array_path = f"{path}/{name}"
# array_name = "/" + array_path

expected_attrs = {} if attributes is None else attributes

array_path = path + ("/" if not path.endswith("/") else "") + name
root = Group.create(store)
fill_value_args: tuple[Any, ...] = tuple()
if nparray.dtype.kind == "M":
fill_value_args = ("ns",)

a = root.create_array(
array_path,
shape=nparray.shape,
chunks=chunks,
dtype=nparray.dtype.str,
attributes=attributes,
# compressor=compressor, # TODO: FIXME
fill_value=nparray.dtype.type(0, *fill_value_args),
)

assert isinstance(a, Array)
assert nparray.shape == a.shape
assert chunks == a.chunks
assert array_path == a.path, (path, name, array_path, a.name, a.path)
# assert array_path == a.name, (path, name, array_path, a.name, a.path)
# assert a.basename is None # TODO
# assert a.store == normalize_store_arg(store)
assert dict(a.attrs) == expected_attrs

a[:] = nparray

return a


def is_negative_slice(idx: Any) -> bool:
return isinstance(idx, slice) and idx.step is not None and idx.step < 0


@st.composite # type: ignore[misc]
def basic_indices(draw: st.DrawFn, *, shape: tuple[int], **kwargs): # type: ignore[no-untyped-def]
"""Basic indices without unsupported negative slices."""
return draw(
npst.basic_indices(shape=shape, **kwargs).filter(
lambda idxr: (
not (
is_negative_slice(idxr)
or (isinstance(idxr, tuple) and any(is_negative_slice(idx) for idx in idxr))
)
)
)
)
15 changes: 15 additions & 0 deletions tests/v3/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import numpy as np
import pytest
from hypothesis import HealthCheck, Verbosity, settings

from zarr.store import LocalStore, MemoryStore, StorePath
from zarr.store.remote import RemoteStore
Expand Down Expand Up @@ -111,3 +112,17 @@ def array_fixture(request: pytest.FixtureRequest) -> np.ndarray:
.reshape(array_request.shape, order=array_request.order)
.astype(array_request.dtype)
)


settings.register_profile(
"ci",
max_examples=1000,
deadline=None,
suppress_health_check=[HealthCheck.filter_too_much, HealthCheck.too_slow],
)
settings.register_profile(
"local",
max_examples=300,
suppress_health_check=[HealthCheck.filter_too_much, HealthCheck.too_slow],
verbosity=Verbosity.verbose,
)
69 changes: 69 additions & 0 deletions tests/v3/test_properties.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import numpy as np
import pytest
from numpy.testing import assert_array_equal

pytest.importorskip("hypothesis")

import hypothesis.extra.numpy as npst # noqa
import hypothesis.strategies as st # noqa
from hypothesis import given, settings # noqa
from zarr.strategies import arrays, np_arrays, basic_indices # noqa


@given(st.data())
def test_roundtrip(data):
nparray = data.draw(np_arrays)
zarray = data.draw(arrays(arrays=st.just(nparray)))
assert_array_equal(nparray, zarray[:])


@given(data=st.data())
def test_basic_indexing(data):
zarray = data.draw(arrays())
nparray = zarray[:]
indexer = data.draw(basic_indices(shape=nparray.shape))
actual = zarray[indexer]
assert_array_equal(nparray[indexer], actual)

new_data = np.ones_like(actual)
zarray[indexer] = new_data
nparray[indexer] = new_data
assert_array_equal(nparray, zarray[:])


@given(data=st.data())
def test_vindex(data):
zarray = data.draw(arrays())
nparray = zarray[:]

indexer = data.draw(
npst.integer_array_indices(
shape=nparray.shape, result_shape=npst.array_shapes(max_dims=None)
)
)
actual = zarray.vindex[indexer]
assert_array_equal(nparray[indexer], actual)


# @st.composite
# def advanced_indices(draw, *, shape):
# basic_idxr = draw(
# basic_indices(
# shape=shape, min_dims=len(shape), max_dims=len(shape), allow_ellipsis=False
# ).filter(lambda x: isinstance(x, tuple))
# )

# int_idxr = draw(
# npst.integer_array_indices(shape=shape, result_shape=npst.array_shapes(max_dims=1))
# )
# args = tuple(
# st.sampled_from((l, r)) for l, r in zip_longest(basic_idxr, int_idxr, fillvalue=slice(None))
# )
# return draw(st.tuples(*args))


# @given(st.data())
# def test_roundtrip_object_array(data):
# nparray = data.draw(np_arrays)
# zarray = data.draw(arrays(arrays=st.just(nparray)))
# assert_array_equal(nparray, zarray[:])