Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add hypothesis property tests #1746

Merged
merged 26 commits into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions .github/workflows/hypothesis.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
name: Slow Hypothesis CI
on:
push:
branches:
- "main"
- "v3"
pull_request:
branches:
- "main"
- "v3"
types: [opened, reopened, synchronize, labeled]
schedule:
- cron: "0 0 * * *" # Daily “At 00:00” UTC
workflow_dispatch: # allows you to trigger manually

env:
FORCE_COLOR: 3

jobs:

hypothesis:
name: Slow Hypothesis Tests
runs-on: "ubuntu-latest"
defaults:
run:
shell: bash -l {0}

strategy:
matrix:
python-version: ['3.11']
numpy-version: ['1.26']
dependency-set: ["optional"]

steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
- name: Install Hatch
run: |
python -m pip install --upgrade pip
pip install hatch
- name: Set Up Hatch Env
run: |
hatch env create test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }}
hatch env run -e test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} list-env
# https://github.com/actions/cache/blob/main/tips-and-workarounds.md#update-a-cache
- name: Restore cached hypothesis directory
id: restore-hypothesis-cache
uses: actions/cache/restore@v4
with:
path: .hypothesis/
key: cache-hypothesis-${{ runner.os }}-${{ github.run_id }}
restore-keys: |
cache-hypothesis-

- name: Run slow Hypothesis tests
if: success()
id: status
run: |
hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run-hypothesis

# explicitly save the cache so it gets updated, also do this even if it fails.
- name: Save cached hypothesis directory
id: save-hypothesis-cache
if: always() && steps.status.outcome != 'skipped'
uses: actions/cache/save@v4
with:
path: .hypothesis/
key: cache-hypothesis-${{ runner.os }}-${{ github.run_id }}

- name: Generate and publish the report
if: |
failure()
&& steps.status.outcome == 'failure'
&& github.event_name == 'schedule'
&& github.repository_owner == 'zarr-developers'
uses: xarray-contrib/issue-from-pytest-log@v1
with:
log-path: output-${{ matrix.python-version }}-log.jsonl
issue-title: "Nightly Hypothesis tests failed"
issue-label: "topic-hypothesis"
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,5 @@ data/*
src/fixture/

.DS_Store
tests/.hypothesis
.hypothesis/
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ extra-dependencies = [
"msgpack",
"lmdb",
"pytest-asyncio",
"mypy"
"mypy",
"hypothesis",
]
features = ["extra"]

Expand All @@ -131,6 +132,7 @@ run-coverage = "pytest --cov-config=pyproject.toml --cov=pkg --cov=tests"
run = "run-coverage --no-cov"
run-verbose = "run-coverage --verbose"
run-mypy = "mypy src"
run-hypothesis = "pytest --hypothesis-show-statistics tests/test_properties.py"
list-env = "pip list"

[tool.hatch.envs.docs]
Expand Down
139 changes: 139 additions & 0 deletions src/zarr/strategies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
from typing import Any

import hypothesis.extra.numpy as npst
import hypothesis.strategies as st
import numpy as np
from hypothesis import given, settings # noqa

from .array import Array
from .group import Group
from .store import MemoryStore, StoreLike

# Copied from Xarray
_attr_keys = st.text(st.characters(), min_size=1)
_attr_values = st.recursive(
st.none() | st.booleans() | st.text(st.characters(), max_size=5),
lambda children: st.lists(children) | st.dictionaries(_attr_keys, children),
max_leaves=3,
)

# No '/' in array names?
# No '.' in paths?
zarr_key_chars = st.sampled_from("-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz")
dcherian marked this conversation as resolved.
Show resolved Hide resolved

# The following should be public strategies
attrs = st.none() | st.dictionaries(_attr_keys, _attr_values)
paths = st.none() | st.text(zarr_key_chars, min_size=1) | st.just("/")
array_names = st.text(zarr_key_chars | st.just("."), min_size=1).filter(
lambda t: not t.startswith((".", ".."))
dcherian marked this conversation as resolved.
Show resolved Hide resolved
)
np_arrays = npst.arrays(
# FIXME: re-enable timedeltas once we figure out the fill_value issue.
dtype=npst.scalar_dtypes().filter(lambda x: x.kind != "m"),
dcherian marked this conversation as resolved.
Show resolved Hide resolved
shape=npst.array_shapes(max_dims=4),
)
stores = st.builds(MemoryStore, st.just({}), mode=st.just("w"))
compressors = st.sampled_from([None, "default"])
dcherian marked this conversation as resolved.
Show resolved Hide resolved


@st.composite # type: ignore[misc]
def np_array_and_chunks(
draw: st.DrawFn, *, arrays: st.SearchStrategy[np.ndarray] = np_arrays
) -> tuple[np.ndarray, tuple[int]]:
"""A hypothesis strategy to generate small sized random arrays.

Returns: a tuple of the array and a suitable random chunking for it.
"""
array = draw(arrays)
# We want this strategy to shrink towards arrays with smaller number of chunks
# 1. st.integers() shrinks towards smaller values. So we use that to generate number of chunks
numchunks = draw(st.tuples(*[st.integers(min_value=1, max_value=size) for size in array.shape]))
# 2. and now generate the chunks tuple
chunks = tuple(size // nchunks for size, nchunks in zip(array.shape, numchunks, strict=True))
return (array, chunks)


@st.composite # type: ignore[misc]
def arrays(
draw: st.DrawFn,
*,
compressors: st.SearchStrategy = compressors,
stores: st.SearchStrategy[StoreLike] = stores,
arrays: st.SearchStrategy[np.ndarray] = np_arrays,
paths: st.SearchStrategy[None | str] = paths,
array_names: st.SearchStrategy = array_names,
attrs: st.SearchStrategy = attrs,
) -> Array:
store = draw(stores)
nparray, chunks = draw(np_array_and_chunks(arrays=arrays))
path = draw(paths)
name = draw(array_names)
attributes = draw(attrs)
# compressor = draw(compressors)

# TODO: clean this up
if path is None and name is None:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this mess be cleaned up?

array_path = None
array_name = None
elif path is None and name is not None:
array_path = f"{name}"
array_name = f"/{name}"
elif path is not None and name is None:
array_path = path
array_name = None
elif path == "/":
assert name is not None
array_path = name
array_name = "/" + name
else:
assert name is not None
array_path = f"{path}/{name}"
array_name = "/" + array_path

expected_attrs = {} if attributes is None else attributes

root = Group.create(store)
a = root.create_array(
array_path,
shape=nparray.shape,
chunks=chunks,
dtype=nparray.dtype.str,
attributes=attributes,
# compressor=compressor, # TODO: FIXME
# TODO: FIXME seems to break with booleans and timedelta
# fill_value=nparray.dtype.type(0),
dcherian marked this conversation as resolved.
Show resolved Hide resolved
)

assert isinstance(a, Array)
assert nparray.shape == a.shape
# assert chunks == a.chunks # TODO: adapt for v2, v3
assert array_path == a.path
assert array_name == a.name
# assert a.basename is None # TODO
# assert a.store == normalize_store_arg(store)
assert dict(a.attrs) == expected_attrs
dcherian marked this conversation as resolved.
Show resolved Hide resolved

a[:] = nparray

store.close()
dcherian marked this conversation as resolved.
Show resolved Hide resolved

return a
dcherian marked this conversation as resolved.
Show resolved Hide resolved


def is_negative_slice(idx: Any) -> bool:
return isinstance(idx, slice) and idx.step is not None and idx.step < 0


@st.composite # type: ignore[misc]
def basic_indices(draw: st.DrawFn, *, shape: tuple[int]):
"""Basic indices without unsupported negative slices."""
return draw(
npst.basic_indices(shape=shape).filter(
lambda idxr: (
not (
is_negative_slice(idxr)
or (isinstance(idxr, tuple) and any(is_negative_slice(idx) for idx in idxr))
)
)
)
)
48 changes: 48 additions & 0 deletions tests/test_properties.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import numpy as np
import pytest
from numpy.testing import assert_array_equal

pytest.importorskip("hypothesis")

import hypothesis.extra.numpy as npst # noqa
import hypothesis.strategies as st # noqa
from hypothesis import given, settings # noqa
from zarr.strategies import arrays, np_arrays, basic_indices # noqa


# @pytest.mark.slow
@settings(max_examples=300)
@given(st.data())
def test_roundtrip(data):
nparray = data.draw(np_arrays)
zarray = data.draw(arrays(arrays=st.just(nparray)))
assert_array_equal(nparray, zarray[:])


@given(st.data())
def test_roundtrip_object_array(data):
nparray = data.draw(np_arrays)
zarray = data.draw(arrays(arrays=st.just(nparray)))
assert_array_equal(nparray, zarray[:])


# @pytest.mark.slow
@settings(max_examples=500)
@given(data=st.data())
def test_basic_indexing(data):
zarray = data.draw(arrays())
nparray = zarray[:]
indexer = data.draw(basic_indices(shape=nparray.shape))
actual = zarray[indexer]
assert_array_equal(nparray[indexer], actual)

new_data = np.ones_like(actual)
zarray[indexer] = new_data
nparray[indexer] = new_data
assert_array_equal(nparray, zarray[:])


@settings(max_examples=500)
@given(data=st.data())
def test_advanced_indexing(data):
pass