From 53f228a63ea57c8e60a6c633d720dbddb1787b46 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Thu, 14 Dec 2023 14:01:10 +0100 Subject: [PATCH] Test improvements --- assets/processes | 2 +- pyproject.toml | 1 + .../lib/process_runner/base.py | 13 +++- .../lib/process_runner/dask.py | 4 +- .../lib/process_runner/util.py | 22 ++++-- .../lib/process_runner/vito.py | 4 +- .../processes/processing/test_example.py | 69 ++++++++++++++----- 7 files changed, 84 insertions(+), 31 deletions(-) diff --git a/assets/processes b/assets/processes index 2cb3a61..3e72672 160000 --- a/assets/processes +++ b/assets/processes @@ -1 +1 @@ -Subproject commit 2cb3a61b46ebb9b9c956bedcc9eeec3529626149 +Subproject commit 3e726721873e2d6b6a862f3965408fefb1b13c27 diff --git a/pyproject.toml b/pyproject.toml index 0ea4494..69ee2aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "xarray>=2023.11.0", "numpy>=1.26.2", "deepdiff>=6.7.1", + "python-dateutil>=2.8.2", ] classifiers = [ "Programming Language :: Python :: 3", diff --git a/src/openeo_test_suite/lib/process_runner/base.py b/src/openeo_test_suite/lib/process_runner/base.py index d87afcc..719b872 100644 --- a/src/openeo_test_suite/lib/process_runner/base.py +++ b/src/openeo_test_suite/lib/process_runner/base.py @@ -48,9 +48,18 @@ def encode_datacube(self, data: Dict) -> Any: """ raise Exception("datacubes not implemented yet") - def decode_data(self, data: Any) -> Any: + def encode_data(self, data: Any) -> Any: """ - Converts data from the internal backend representation to the process test/JSON5 representation + Converts data from the process test/JSON5 representation to the internal backend representation, + excluding datacubes and labeled arrays. + For example: JSON data types to numpy arrays. + openEO process tests specification -> backend + """ + return data + + def decode_data(self, data: Any, expected: Any) -> Any: + """ + Converts data from the internal backend representation to the process test/JSON5 representation. For example: numpy values to JSON data types, labeled-array or datacube to JSON object representation. backend -> openEO process tests specification diff --git a/src/openeo_test_suite/lib/process_runner/dask.py b/src/openeo_test_suite/lib/process_runner/dask.py index e9ce3dd..b508c34 100644 --- a/src/openeo_test_suite/lib/process_runner/dask.py +++ b/src/openeo_test_suite/lib/process_runner/dask.py @@ -61,7 +61,7 @@ def encode_process_graph( def encode_datacube(self, data): return datacube_to_xarray(data) - def decode_data(self, data): - data = numpy_to_native(data) + def decode_data(self, data, expected): + data = numpy_to_native(data, expected) data = xarray_to_datacube(data) return data diff --git a/src/openeo_test_suite/lib/process_runner/util.py b/src/openeo_test_suite/lib/process_runner/util.py index 19e9305..b2d8e8b 100644 --- a/src/openeo_test_suite/lib/process_runner/util.py +++ b/src/openeo_test_suite/lib/process_runner/util.py @@ -1,16 +1,22 @@ +from dateutil.parser import parse from datetime import datetime, timezone import numpy as np import xarray as xr -def numpy_to_native(data): +def numpy_to_native(data, expected): # Converting numpy dtypes to native python types if isinstance(data, np.ndarray) or isinstance(data, np.generic): - if data.size == 1: - return data.item() - elif data.size > 1: + if isinstance(expected, list): return data.tolist() + else: + if data.size == 0: + return None + if data.size == 1: + return data.item() + elif data.size > 1: + return data.tolist() return data @@ -52,7 +58,7 @@ def xarray_to_datacube(data): axis = None if isinstance(data.coords[c].values[0], np.datetime64): type = "temporal" - values = [iso_datetime(date) for date in data.coords[c].values] + values = [datetime_to_isostr(date) for date in data.coords[c].values] else: values = data.coords[c].values.tolist() if c == "x": # todo: non-standardized @@ -77,7 +83,11 @@ def xarray_to_datacube(data): return cube -def iso_datetime(dt): +def isostr_to_datetime(dt): + return parse(dt) + + +def datetime_to_isostr(dt): # Convert numpy.datetime64 to timestamp (in seconds) timestamp = dt.astype("datetime64[s]").astype(int) # Create a datetime object from the timestamp diff --git a/src/openeo_test_suite/lib/process_runner/vito.py b/src/openeo_test_suite/lib/process_runner/vito.py index c759380..c11d671 100644 --- a/src/openeo_test_suite/lib/process_runner/vito.py +++ b/src/openeo_test_suite/lib/process_runner/vito.py @@ -22,7 +22,7 @@ def execute(self, id, arguments): def encode_datacube(self, data): return datacube_to_xarray(data) - def decode_data(self, data): - data = numpy_to_native(data) + def decode_data(self, data, expected): + data = numpy_to_native(data, expected) data = xarray_to_datacube(data) return data diff --git a/src/openeo_test_suite/tests/processes/processing/test_example.py b/src/openeo_test_suite/tests/processes/processing/test_example.py index f58aa54..a106039 100644 --- a/src/openeo_test_suite/tests/processes/processing/test_example.py +++ b/src/openeo_test_suite/tests/processes/processing/test_example.py @@ -1,6 +1,7 @@ import math import warnings from pathlib import Path, posixpath +from openeo_test_suite.lib.process_runner.util import isostr_to_datetime import json5 import pytest @@ -116,12 +117,12 @@ def prepare_arguments(arguments, process_id, connection, file): arg = connection.encode_labeled_array(arg) # datacubes elif arg["type"] == "datacube": - if "data" in arg: - arg["data"] = load_datacube(arg) arg = connection.encode_datacube(arg) elif "process_graph" in arg: arg = connection.encode_process_graph(arg, process_id, name) + arg = connection.encode_data(arg) + if connection.is_json_only(): check_non_json_values(arg) @@ -130,15 +131,32 @@ def prepare_arguments(arguments, process_id, connection, file): return arguments -def load_datacube(cube): - if isinstance(cube["data"], str): - path = posixpath.join(cube["path"], cube["data"]) - if path.endswith(".nc"): - return xr.open_dataarray(path) +def prepare_results(example, result = None): + # go through the example and result recursively and convert datetimes to iso strings + # could be used for more conversions in the future... + + if isinstance(example, dict): + if "type" in example and example["type"] == "datetime": + example = isostr_to_datetime(example["value"]) + try: + result = isostr_to_datetime(result) + except: + pass else: - raise Exception("Datacubes from non-netCDF files not implemented yet") - else: - return cube["data"] + for key in example: + if key not in result: + (example[key],) = prepare_results(example[key]) + else: + (example[key], result[key]) = prepare_results(example[key], result[key]) + + elif isinstance(example, list): + for i in range(len(example)): + if i >= len(result): + (example[i],) = prepare_results(example[i]) + else: + (example[i], result[i]) = prepare_results(example[i], result[i]) + + return (example, result) def load_ref(ref, file): @@ -170,7 +188,7 @@ def check_non_json_values(value): def check_exception(example, result): - assert isinstance(result, Exception) + assert isinstance(result, Exception), "Excpected an exception, but got {}".format(result) if isinstance(example["throws"], str): if result.__class__.__name__ != example["throws"]: warnings.warn( @@ -183,13 +201,18 @@ def check_exception(example, result): def check_return_value(example, result, connection): - assert not isinstance(result, Exception) + assert not isinstance(result, Exception), "Unexpected exception: {} ".format(str(result)) # handle custom types of data - result = connection.decode_data(result) + result = connection.decode_data(result, example["returns"]) + + # decode special types (currently mostly datetimes) + (example["returns"], result) = prepare_results(example["returns"], result) + + delta = example["delta"] if "delta" in example else 0.0000000001 if isinstance(example["returns"], dict): - assert isinstance(result, dict) + assert isinstance(result, dict), "Expected a dict but got {}".format(type(result)) exclude_regex_paths = [] exclude_paths = [] ignore_order_func = None @@ -205,24 +228,34 @@ def check_return_value(example, result, connection): exclude_paths.append("root['data']") ignore_order_func = lambda level: "dimensions" in level.path() - assert {} == DeepDiff( + diff = DeepDiff( example["returns"], result, - significant_digits=10, # todo + math_epsilon=delta, ignore_numeric_type_changes=True, exclude_paths=exclude_paths, exclude_regex_paths=exclude_regex_paths, ignore_order_func=ignore_order_func, ) + assert {} == diff, "Differences: {}".format(str(diff)) + elif isinstance(example["returns"], list): + assert isinstance(result, list), "Expected a list but got {}".format(type(result)) + diff = DeepDiff( + example["returns"], + result, + math_epsilon=delta, + ignore_numeric_type_changes=True, + ) + assert {} == diff, "Differences: {}".format(str(diff)) elif isinstance(example["returns"], float) and math.isnan(example["returns"]): - assert math.isnan(result) + assert math.isnan(result), "Got {} instead of NaN".format(result) elif isinstance(example["returns"], float) or isinstance(example["returns"], int): msg = "Expected a numerical result but got {} of type {}".format( result, type(result) ) assert isinstance(result, float) or isinstance(result, int), msg + assert not math.isnan(result), "Got unexpected NaN as result" # handle numerical data with a delta - delta = example["delta"] if "delta" in example else 0.0000000001 assert result == pytest.approx(example["returns"], delta) else: msg = "Expected {} but got {}".format(example["returns"], result)