From 8a298349a950e194075d3b3df93ddfc451c6c8c1 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Wed, 13 Dec 2023 15:09:43 +0100 Subject: [PATCH 01/11] Update individual process testing --- assets/processes | 2 +- .../processes/processing/test_example.py | 46 ++++++++++++------- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/assets/processes b/assets/processes index 5e3ce04..5c1cf62 160000 --- a/assets/processes +++ b/assets/processes @@ -1 +1 @@ -Subproject commit 5e3ce04dbe77e7073beac388f6d629076e65897f +Subproject commit 5c1cf62b15152406d926ce6e245f82e02620be5a diff --git a/src/openeo_test_suite/tests/processes/processing/test_example.py b/src/openeo_test_suite/tests/processes/processing/test_example.py index 8b224f3..f58aa54 100644 --- a/src/openeo_test_suite/tests/processes/processing/test_example.py +++ b/src/openeo_test_suite/tests/processes/processing/test_example.py @@ -11,6 +11,16 @@ examples_path = "assets/processes/tests/*.json5" +def get_level(data, test): + if "level" in test: + level = test["level"] + elif "level" in data: + level = data["level"] + else: + level = "L4" + return level + + def get_examples(): examples = [] package_root_folder = Path(__file__).parents[5] @@ -21,13 +31,7 @@ def get_examples(): with file.open() as f: data = json5.load(f) for test in data["tests"]: - if "level" in test: - level = test["level"] - elif "level" in data: - level = data["level"] - else: - level = "L4" - + level = get_level(data, test) examples.append([id, test, file, level]) except Exception as e: warnings.warn("Failed to load {} due to {}".format(file, e)) @@ -62,11 +66,7 @@ def test_process(connection, process_levels, processes, id, example, file, level try: connection.describe_process(pid) except: - pytest.skip( - "Test requires additional process {} which is not available".format( - pid - ) - ) + pytest.skip("Test requires missing process {}".format(pid)) # prepare the arguments from test JSON encoding to internal backend representations # or skip if not supported by the test runner @@ -190,15 +190,29 @@ def check_return_value(example, result, connection): if isinstance(example["returns"], dict): assert isinstance(result, dict) + exclude_regex_paths = [] + exclude_paths = [] + ignore_order_func = None + if "type" in example["returns"] and example["returns"]["type"] == "datacube": + # todo: non-standardized + exclude_regex_paths.append( + r"root\['dimensions'\]\[\d+\]\['reference_system'\]" + ) + # todo: non-standardized + exclude_paths.append("root['nodata']") + # ignore data if operation is not changing data + if example["returns"]["data"] is None: + exclude_paths.append("root['data']") + ignore_order_func = lambda level: "dimensions" in level.path() + assert {} == DeepDiff( example["returns"], result, significant_digits=10, # todo ignore_numeric_type_changes=True, - exclude_paths=["root['nodata']"], # todo: non-standardized - exclude_regex_paths=[ - r"root\['dimensions'\]\[\d+\]\['reference_system'\]" # todo: non-standardized - ], + exclude_paths=exclude_paths, + exclude_regex_paths=exclude_regex_paths, + ignore_order_func=ignore_order_func, ) elif isinstance(example["returns"], float) and math.isnan(example["returns"]): assert math.isnan(result) From 7a148239859c7a856bc1a82e3b8a0b7e1a916c88 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Wed, 13 Dec 2023 15:10:02 +0100 Subject: [PATCH 02/11] Ignore non-related deprecation warnings --- assets/processes | 2 +- pyproject.toml | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/assets/processes b/assets/processes index 5c1cf62..2cb3a61 160000 --- a/assets/processes +++ b/assets/processes @@ -1 +1 @@ -Subproject commit 5c1cf62b15152406d926ce6e245f82e02620be5a +Subproject commit 2cb3a61b46ebb9b9c956bedcc9eeec3529626149 diff --git a/pyproject.toml b/pyproject.toml index 4bdc767..0ea4494 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,3 +32,6 @@ Dask = [ testpaths = [ "src/openeo_test_suite/tests", ] +filterwarnings = [ + "ignore:(pkg_resources|jsonschema.RefResolver):DeprecationWarning", +] From 377e367da03c3d1ce2b57aa8bf3b18bfe09e6f8d Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Thu, 14 Dec 2023 14:01:10 +0100 Subject: [PATCH 03/11] Test improvements --- assets/processes | 2 +- pyproject.toml | 1 + .../lib/process_runner/base.py | 13 +++- .../lib/process_runner/dask.py | 4 +- .../lib/process_runner/util.py | 22 ++++-- .../lib/process_runner/vito.py | 4 +- .../processes/processing/test_example.py | 69 ++++++++++++++----- 7 files changed, 84 insertions(+), 31 deletions(-) diff --git a/assets/processes b/assets/processes index 2cb3a61..71f06f0 160000 --- a/assets/processes +++ b/assets/processes @@ -1 +1 @@ -Subproject commit 2cb3a61b46ebb9b9c956bedcc9eeec3529626149 +Subproject commit 71f06f0c388c0af6a899258402449811c94c9b59 diff --git a/pyproject.toml b/pyproject.toml index 0ea4494..69ee2aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "xarray>=2023.11.0", "numpy>=1.26.2", "deepdiff>=6.7.1", + "python-dateutil>=2.8.2", ] classifiers = [ "Programming Language :: Python :: 3", diff --git a/src/openeo_test_suite/lib/process_runner/base.py b/src/openeo_test_suite/lib/process_runner/base.py index d87afcc..719b872 100644 --- a/src/openeo_test_suite/lib/process_runner/base.py +++ b/src/openeo_test_suite/lib/process_runner/base.py @@ -48,9 +48,18 @@ def encode_datacube(self, data: Dict) -> Any: """ raise Exception("datacubes not implemented yet") - def decode_data(self, data: Any) -> Any: + def encode_data(self, data: Any) -> Any: """ - Converts data from the internal backend representation to the process test/JSON5 representation + Converts data from the process test/JSON5 representation to the internal backend representation, + excluding datacubes and labeled arrays. + For example: JSON data types to numpy arrays. + openEO process tests specification -> backend + """ + return data + + def decode_data(self, data: Any, expected: Any) -> Any: + """ + Converts data from the internal backend representation to the process test/JSON5 representation. For example: numpy values to JSON data types, labeled-array or datacube to JSON object representation. backend -> openEO process tests specification diff --git a/src/openeo_test_suite/lib/process_runner/dask.py b/src/openeo_test_suite/lib/process_runner/dask.py index e9ce3dd..b508c34 100644 --- a/src/openeo_test_suite/lib/process_runner/dask.py +++ b/src/openeo_test_suite/lib/process_runner/dask.py @@ -61,7 +61,7 @@ def encode_process_graph( def encode_datacube(self, data): return datacube_to_xarray(data) - def decode_data(self, data): - data = numpy_to_native(data) + def decode_data(self, data, expected): + data = numpy_to_native(data, expected) data = xarray_to_datacube(data) return data diff --git a/src/openeo_test_suite/lib/process_runner/util.py b/src/openeo_test_suite/lib/process_runner/util.py index 19e9305..b2d8e8b 100644 --- a/src/openeo_test_suite/lib/process_runner/util.py +++ b/src/openeo_test_suite/lib/process_runner/util.py @@ -1,16 +1,22 @@ +from dateutil.parser import parse from datetime import datetime, timezone import numpy as np import xarray as xr -def numpy_to_native(data): +def numpy_to_native(data, expected): # Converting numpy dtypes to native python types if isinstance(data, np.ndarray) or isinstance(data, np.generic): - if data.size == 1: - return data.item() - elif data.size > 1: + if isinstance(expected, list): return data.tolist() + else: + if data.size == 0: + return None + if data.size == 1: + return data.item() + elif data.size > 1: + return data.tolist() return data @@ -52,7 +58,7 @@ def xarray_to_datacube(data): axis = None if isinstance(data.coords[c].values[0], np.datetime64): type = "temporal" - values = [iso_datetime(date) for date in data.coords[c].values] + values = [datetime_to_isostr(date) for date in data.coords[c].values] else: values = data.coords[c].values.tolist() if c == "x": # todo: non-standardized @@ -77,7 +83,11 @@ def xarray_to_datacube(data): return cube -def iso_datetime(dt): +def isostr_to_datetime(dt): + return parse(dt) + + +def datetime_to_isostr(dt): # Convert numpy.datetime64 to timestamp (in seconds) timestamp = dt.astype("datetime64[s]").astype(int) # Create a datetime object from the timestamp diff --git a/src/openeo_test_suite/lib/process_runner/vito.py b/src/openeo_test_suite/lib/process_runner/vito.py index c759380..c11d671 100644 --- a/src/openeo_test_suite/lib/process_runner/vito.py +++ b/src/openeo_test_suite/lib/process_runner/vito.py @@ -22,7 +22,7 @@ def execute(self, id, arguments): def encode_datacube(self, data): return datacube_to_xarray(data) - def decode_data(self, data): - data = numpy_to_native(data) + def decode_data(self, data, expected): + data = numpy_to_native(data, expected) data = xarray_to_datacube(data) return data diff --git a/src/openeo_test_suite/tests/processes/processing/test_example.py b/src/openeo_test_suite/tests/processes/processing/test_example.py index f58aa54..a106039 100644 --- a/src/openeo_test_suite/tests/processes/processing/test_example.py +++ b/src/openeo_test_suite/tests/processes/processing/test_example.py @@ -1,6 +1,7 @@ import math import warnings from pathlib import Path, posixpath +from openeo_test_suite.lib.process_runner.util import isostr_to_datetime import json5 import pytest @@ -116,12 +117,12 @@ def prepare_arguments(arguments, process_id, connection, file): arg = connection.encode_labeled_array(arg) # datacubes elif arg["type"] == "datacube": - if "data" in arg: - arg["data"] = load_datacube(arg) arg = connection.encode_datacube(arg) elif "process_graph" in arg: arg = connection.encode_process_graph(arg, process_id, name) + arg = connection.encode_data(arg) + if connection.is_json_only(): check_non_json_values(arg) @@ -130,15 +131,32 @@ def prepare_arguments(arguments, process_id, connection, file): return arguments -def load_datacube(cube): - if isinstance(cube["data"], str): - path = posixpath.join(cube["path"], cube["data"]) - if path.endswith(".nc"): - return xr.open_dataarray(path) +def prepare_results(example, result = None): + # go through the example and result recursively and convert datetimes to iso strings + # could be used for more conversions in the future... + + if isinstance(example, dict): + if "type" in example and example["type"] == "datetime": + example = isostr_to_datetime(example["value"]) + try: + result = isostr_to_datetime(result) + except: + pass else: - raise Exception("Datacubes from non-netCDF files not implemented yet") - else: - return cube["data"] + for key in example: + if key not in result: + (example[key],) = prepare_results(example[key]) + else: + (example[key], result[key]) = prepare_results(example[key], result[key]) + + elif isinstance(example, list): + for i in range(len(example)): + if i >= len(result): + (example[i],) = prepare_results(example[i]) + else: + (example[i], result[i]) = prepare_results(example[i], result[i]) + + return (example, result) def load_ref(ref, file): @@ -170,7 +188,7 @@ def check_non_json_values(value): def check_exception(example, result): - assert isinstance(result, Exception) + assert isinstance(result, Exception), "Excpected an exception, but got {}".format(result) if isinstance(example["throws"], str): if result.__class__.__name__ != example["throws"]: warnings.warn( @@ -183,13 +201,18 @@ def check_exception(example, result): def check_return_value(example, result, connection): - assert not isinstance(result, Exception) + assert not isinstance(result, Exception), "Unexpected exception: {} ".format(str(result)) # handle custom types of data - result = connection.decode_data(result) + result = connection.decode_data(result, example["returns"]) + + # decode special types (currently mostly datetimes) + (example["returns"], result) = prepare_results(example["returns"], result) + + delta = example["delta"] if "delta" in example else 0.0000000001 if isinstance(example["returns"], dict): - assert isinstance(result, dict) + assert isinstance(result, dict), "Expected a dict but got {}".format(type(result)) exclude_regex_paths = [] exclude_paths = [] ignore_order_func = None @@ -205,24 +228,34 @@ def check_return_value(example, result, connection): exclude_paths.append("root['data']") ignore_order_func = lambda level: "dimensions" in level.path() - assert {} == DeepDiff( + diff = DeepDiff( example["returns"], result, - significant_digits=10, # todo + math_epsilon=delta, ignore_numeric_type_changes=True, exclude_paths=exclude_paths, exclude_regex_paths=exclude_regex_paths, ignore_order_func=ignore_order_func, ) + assert {} == diff, "Differences: {}".format(str(diff)) + elif isinstance(example["returns"], list): + assert isinstance(result, list), "Expected a list but got {}".format(type(result)) + diff = DeepDiff( + example["returns"], + result, + math_epsilon=delta, + ignore_numeric_type_changes=True, + ) + assert {} == diff, "Differences: {}".format(str(diff)) elif isinstance(example["returns"], float) and math.isnan(example["returns"]): - assert math.isnan(result) + assert math.isnan(result), "Got {} instead of NaN".format(result) elif isinstance(example["returns"], float) or isinstance(example["returns"], int): msg = "Expected a numerical result but got {} of type {}".format( result, type(result) ) assert isinstance(result, float) or isinstance(result, int), msg + assert not math.isnan(result), "Got unexpected NaN as result" # handle numerical data with a delta - delta = example["delta"] if "delta" in example else 0.0000000001 assert result == pytest.approx(example["returns"], delta) else: msg = "Expected {} but got {}".format(example["returns"], result) From ffc17abdad9daa4456a1dd4f675263e2b8e197d4 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Thu, 14 Dec 2023 22:03:23 +0100 Subject: [PATCH 04/11] Special no-data handling --- assets/processes | 2 +- .../lib/process_runner/base.py | 6 ++ .../lib/process_runner/dask.py | 7 ++ .../lib/process_runner/vito.py | 3 + .../processes/processing/test_example.py | 84 +++++++++++-------- 5 files changed, 66 insertions(+), 36 deletions(-) diff --git a/assets/processes b/assets/processes index 71f06f0..edc7e40 160000 --- a/assets/processes +++ b/assets/processes @@ -1 +1 @@ -Subproject commit 71f06f0c388c0af6a899258402449811c94c9b59 +Subproject commit edc7e40aa45a6fdf92505b3635da0a33101f85fb diff --git a/src/openeo_test_suite/lib/process_runner/base.py b/src/openeo_test_suite/lib/process_runner/base.py index 719b872..df675f3 100644 --- a/src/openeo_test_suite/lib/process_runner/base.py +++ b/src/openeo_test_suite/lib/process_runner/base.py @@ -72,3 +72,9 @@ def is_json_only(self) -> bool: If True, the runner will skip all tests that contain non JSON values such as infinity and NaN. """ return False + + def get_nodata_value(self) -> Any: + """ + Returns the nodata value of the backend. + """ + return None \ No newline at end of file diff --git a/src/openeo_test_suite/lib/process_runner/dask.py b/src/openeo_test_suite/lib/process_runner/dask.py index b508c34..b20a8c0 100644 --- a/src/openeo_test_suite/lib/process_runner/dask.py +++ b/src/openeo_test_suite/lib/process_runner/dask.py @@ -24,6 +24,10 @@ def create_process_registry(): ) ] + # not sure why this is needed + from openeo_processes_dask.process_implementations.math import e + processes_from_module.append(e) + specs_module = importlib.import_module("openeo_processes_dask.specs") specs = { func.__name__: getattr(specs_module, func.__name__) @@ -65,3 +69,6 @@ def decode_data(self, data, expected): data = numpy_to_native(data, expected) data = xarray_to_datacube(data) return data + + def get_nodata_value(self): + return float('nan') diff --git a/src/openeo_test_suite/lib/process_runner/vito.py b/src/openeo_test_suite/lib/process_runner/vito.py index c11d671..7cf5ce8 100644 --- a/src/openeo_test_suite/lib/process_runner/vito.py +++ b/src/openeo_test_suite/lib/process_runner/vito.py @@ -26,3 +26,6 @@ def decode_data(self, data, expected): data = numpy_to_native(data, expected) data = xarray_to_datacube(data) return data + + def get_nodata_value(self): + return float('nan') diff --git a/src/openeo_test_suite/tests/processes/processing/test_example.py b/src/openeo_test_suite/tests/processes/processing/test_example.py index a106039..3da0d7d 100644 --- a/src/openeo_test_suite/tests/processes/processing/test_example.py +++ b/src/openeo_test_suite/tests/processes/processing/test_example.py @@ -98,63 +98,77 @@ def test_process(connection, process_levels, processes, id, example, file, level elif returns: check_return_value(example, result, connection) else: - pytest.skip("Test doesn't provide an expected result") + pytest.skip("Test for process {} doesn't provide an expected result for arguments: {}".format(id, example["arguments"])) def prepare_arguments(arguments, process_id, connection, file): for name in arguments: - arg = arguments[name] - - # handle external references to files - if isinstance(arg, dict) and "$ref" in arg: - arg = load_ref(arg["$ref"], file) + arguments[name] = prepare_argument(arguments[name], process_id, name, connection, file) + + return arguments - # handle custom types of data - if isinstance(arg, dict): - if "type" in arg: - # labeled arrays - if arg["type"] == "labeled-array": - arg = connection.encode_labeled_array(arg) - # datacubes - elif arg["type"] == "datacube": - arg = connection.encode_datacube(arg) - elif "process_graph" in arg: - arg = connection.encode_process_graph(arg, process_id, name) +def prepare_argument(arg, process_id, name, connection, file): + # handle external references to files + if isinstance(arg, dict) and "$ref" in arg: + arg = load_ref(arg["$ref"], file) - arg = connection.encode_data(arg) + # handle custom types of data + if isinstance(arg, dict): + if "type" in arg: + # labeled arrays + if arg["type"] == "labeled-array": + arg = connection.encode_labeled_array(arg) + # datacubes + elif arg["type"] == "datacube": + arg = connection.encode_datacube(arg) + # nodata-values + elif arg["type"] == "nodata": + arg = connection.get_nodata_value() + elif "process_graph" in arg: + arg = connection.encode_process_graph(arg, process_id, name) + else: + for key in arg: + arg[key] = prepare_argument(arg[key], process_id, name, connection, file) + + elif isinstance(arg, list): + for i in range(len(arg)): + arg[i] = prepare_argument(arg[i], process_id, name, connection, file) - if connection.is_json_only(): - check_non_json_values(arg) + arg = connection.encode_data(arg) - arguments[name] = arg + if connection.is_json_only(): + check_non_json_values(arg) - return arguments + return arg -def prepare_results(example, result = None): +def prepare_results(connection, example, result = None): # go through the example and result recursively and convert datetimes to iso strings # could be used for more conversions in the future... if isinstance(example, dict): - if "type" in example and example["type"] == "datetime": - example = isostr_to_datetime(example["value"]) - try: - result = isostr_to_datetime(result) - except: - pass + if "type" in example: + if example["type"] == "datetime": + example = isostr_to_datetime(example["value"]) + try: + result = isostr_to_datetime(result) + except: + pass + elif example["type"] == "nodata": + example = connection.get_nodata_value() else: for key in example: if key not in result: - (example[key],) = prepare_results(example[key]) + (example[key],) = prepare_results(connection, example[key]) else: - (example[key], result[key]) = prepare_results(example[key], result[key]) + (example[key], result[key]) = prepare_results(connection, example[key], result[key]) elif isinstance(example, list): for i in range(len(example)): if i >= len(result): - (example[i],) = prepare_results(example[i]) + (example[i],) = prepare_results(connection, example[i]) else: - (example[i], result[i]) = prepare_results(example[i], result[i]) + (example[i], result[i]) = prepare_results(connection, example[i], result[i]) return (example, result) @@ -206,8 +220,8 @@ def check_return_value(example, result, connection): # handle custom types of data result = connection.decode_data(result, example["returns"]) - # decode special types (currently mostly datetimes) - (example["returns"], result) = prepare_results(example["returns"], result) + # decode special types (currently mostly datetimes and nodata) + (example["returns"], result) = prepare_results(connection, example["returns"], result) delta = example["delta"] if "delta" in example else 0.0000000001 From f1e6a6dd4a4a6b95325aa2701bc0a4240377fb13 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Fri, 15 Dec 2023 00:07:21 +0100 Subject: [PATCH 05/11] Fix various minor issues in the test suite --- assets/processes | 2 +- .../lib/process_runner/util.py | 9 ++++++- .../processes/processing/test_example.py | 25 +++++++++++-------- 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/assets/processes b/assets/processes index edc7e40..e83f2cc 160000 --- a/assets/processes +++ b/assets/processes @@ -1 +1 @@ -Subproject commit edc7e40aa45a6fdf92505b3635da0a33101f85fb +Subproject commit e83f2ccb12a191b7ed5846434a5b8f41dc047df4 diff --git a/src/openeo_test_suite/lib/process_runner/util.py b/src/openeo_test_suite/lib/process_runner/util.py index b2d8e8b..a8a6656 100644 --- a/src/openeo_test_suite/lib/process_runner/util.py +++ b/src/openeo_test_suite/lib/process_runner/util.py @@ -1,11 +1,15 @@ from dateutil.parser import parse from datetime import datetime, timezone +import dask import numpy as np import xarray as xr def numpy_to_native(data, expected): + if isinstance(data, dask.array.core.Array): + data = data.compute() + # Converting numpy dtypes to native python types if isinstance(data, np.ndarray) or isinstance(data, np.generic): if isinstance(expected, list): @@ -48,6 +52,9 @@ def datacube_to_xarray(cube): def xarray_to_datacube(data): + if isinstance(data, dask.array.core.Array): + data = xr.DataArray(data.compute()) + if not isinstance(data, xr.DataArray): return data @@ -56,7 +63,7 @@ def xarray_to_datacube(data): type = "bands" values = [] axis = None - if isinstance(data.coords[c].values[0], np.datetime64): + if np.issubdtype(data.coords[c].dtype, np.datetime64): type = "temporal" values = [datetime_to_isostr(date) for date in data.coords[c].values] else: diff --git a/src/openeo_test_suite/tests/processes/processing/test_example.py b/src/openeo_test_suite/tests/processes/processing/test_example.py index 3da0d7d..8cd92b0 100644 --- a/src/openeo_test_suite/tests/processes/processing/test_example.py +++ b/src/openeo_test_suite/tests/processes/processing/test_example.py @@ -92,11 +92,11 @@ def test_process(connection, process_levels, processes, id, example, file, level if isinstance(result, Exception): check_exception(example, result) else: - check_return_value(example, result, connection) + check_return_value(example, result, connection, file) elif throws: check_exception(example, result) elif returns: - check_return_value(example, result, connection) + check_return_value(example, result, connection, file) else: pytest.skip("Test for process {} doesn't provide an expected result for arguments: {}".format(id, example["arguments"])) @@ -142,11 +142,15 @@ def prepare_argument(arg, process_id, name, connection, file): return arg -def prepare_results(connection, example, result = None): +def prepare_results(connection, file, example, result = None): # go through the example and result recursively and convert datetimes to iso strings # could be used for more conversions in the future... if isinstance(example, dict): + # handle external references to files + if isinstance(example, dict) and "$ref" in example: + example = load_ref(example["$ref"], file) + if "type" in example: if example["type"] == "datetime": example = isostr_to_datetime(example["value"]) @@ -159,16 +163,16 @@ def prepare_results(connection, example, result = None): else: for key in example: if key not in result: - (example[key],) = prepare_results(connection, example[key]) + (example[key], _) = prepare_results(connection, file, example[key]) else: - (example[key], result[key]) = prepare_results(connection, example[key], result[key]) + (example[key], result[key]) = prepare_results(connection, file, example[key], result[key]) elif isinstance(example, list): for i in range(len(example)): if i >= len(result): - (example[i],) = prepare_results(connection, example[i]) + (example[i], _) = prepare_results(connection, file, example[i]) else: - (example[i], result[i]) = prepare_results(connection, example[i], result[i]) + (example[i], result[i]) = prepare_results(connection, file, example[i], result[i]) return (example, result) @@ -179,7 +183,6 @@ def load_ref(ref, file): path = posixpath.join(file.parent, ref) with open(path) as f: data = json5.load(f) - data["path"] = path return data except Exception as e: raise Exception("Failed to load external reference {}: {}".format(ref, e)) @@ -214,14 +217,14 @@ def check_exception(example, result): # assert result.__class__.__name__ == example["throws"] -def check_return_value(example, result, connection): +def check_return_value(example, result, connection, file): assert not isinstance(result, Exception), "Unexpected exception: {} ".format(str(result)) # handle custom types of data result = connection.decode_data(result, example["returns"]) # decode special types (currently mostly datetimes and nodata) - (example["returns"], result) = prepare_results(connection, example["returns"], result) + (example["returns"], result) = prepare_results(connection, file, example["returns"], result) delta = example["delta"] if "delta" in example else 0.0000000001 @@ -247,6 +250,7 @@ def check_return_value(example, result, connection): result, math_epsilon=delta, ignore_numeric_type_changes=True, + ignore_nan_inequality=True, exclude_paths=exclude_paths, exclude_regex_paths=exclude_regex_paths, ignore_order_func=ignore_order_func, @@ -259,6 +263,7 @@ def check_return_value(example, result, connection): result, math_epsilon=delta, ignore_numeric_type_changes=True, + ignore_nan_inequality=True, ) assert {} == diff, "Differences: {}".format(str(diff)) elif isinstance(example["returns"], float) and math.isnan(example["returns"]): From 3727ac9d46b2f5eec754086a032f903d7ddbeb1b Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Fri, 15 Dec 2023 18:03:46 +0100 Subject: [PATCH 06/11] Refctor datacube object, minor improvements --- assets/processes | 2 +- .../lib/process_runner/base.py | 2 +- .../lib/process_runner/dask.py | 8 ++- .../lib/process_runner/util.py | 30 +++++---- .../lib/process_runner/vito.py | 2 +- .../processes/processing/test_example.py | 65 ++++++++++++------- 6 files changed, 68 insertions(+), 41 deletions(-) diff --git a/assets/processes b/assets/processes index e83f2cc..1982451 160000 --- a/assets/processes +++ b/assets/processes @@ -1 +1 @@ -Subproject commit e83f2ccb12a191b7ed5846434a5b8f41dc047df4 +Subproject commit 1982451a7115bfad41d3fb9153b17a227554053a diff --git a/src/openeo_test_suite/lib/process_runner/base.py b/src/openeo_test_suite/lib/process_runner/base.py index df675f3..8af4d8f 100644 --- a/src/openeo_test_suite/lib/process_runner/base.py +++ b/src/openeo_test_suite/lib/process_runner/base.py @@ -77,4 +77,4 @@ def get_nodata_value(self) -> Any: """ Returns the nodata value of the backend. """ - return None \ No newline at end of file + return None diff --git a/src/openeo_test_suite/lib/process_runner/dask.py b/src/openeo_test_suite/lib/process_runner/dask.py index b20a8c0..4410d8e 100644 --- a/src/openeo_test_suite/lib/process_runner/dask.py +++ b/src/openeo_test_suite/lib/process_runner/dask.py @@ -1,6 +1,7 @@ import importlib import inspect +import dask from openeo_pg_parser_networkx import OpenEOProcessGraph, ProcessRegistry from openeo_pg_parser_networkx.process_registry import Process from openeo_processes_dask.process_implementations.core import process @@ -26,6 +27,7 @@ def create_process_registry(): # not sure why this is needed from openeo_processes_dask.process_implementations.math import e + processes_from_module.append(e) specs_module = importlib.import_module("openeo_processes_dask.specs") @@ -66,9 +68,13 @@ def encode_datacube(self, data): return datacube_to_xarray(data) def decode_data(self, data, expected): + if isinstance(data, dask.array.core.Array): + data = data.compute() + data = numpy_to_native(data, expected) data = xarray_to_datacube(data) + return data def get_nodata_value(self): - return float('nan') + return float("nan") diff --git a/src/openeo_test_suite/lib/process_runner/util.py b/src/openeo_test_suite/lib/process_runner/util.py index a8a6656..cfe7b4d 100644 --- a/src/openeo_test_suite/lib/process_runner/util.py +++ b/src/openeo_test_suite/lib/process_runner/util.py @@ -1,15 +1,11 @@ -from dateutil.parser import parse from datetime import datetime, timezone -import dask import numpy as np import xarray as xr +from dateutil.parser import parse def numpy_to_native(data, expected): - if isinstance(data, dask.array.core.Array): - data = data.compute() - # Converting numpy dtypes to native python types if isinstance(data, np.ndarray) or isinstance(data, np.generic): if isinstance(expected, list): @@ -28,7 +24,8 @@ def numpy_to_native(data, expected): def datacube_to_xarray(cube): coords = [] crs = None - for dim in cube["dimensions"]: + for name in cube["order"]: + dim = cube["dimensions"][name] if dim["type"] == "temporal": # date replace for older Python versions that don't support ISO parsing (only available since 3.11) values = [ @@ -41,7 +38,7 @@ def datacube_to_xarray(cube): else: values = dim["values"] - coords.append((dim["name"], values)) + coords.append((name, values)) da = xr.DataArray(cube["data"], coords=coords) if crs is not None: @@ -52,13 +49,12 @@ def datacube_to_xarray(cube): def xarray_to_datacube(data): - if isinstance(data, dask.array.core.Array): - data = xr.DataArray(data.compute()) - if not isinstance(data, xr.DataArray): return data - dims = [] + order = list(data.dims) + + dims = {} for c in data.coords: type = "bands" values = [] @@ -75,14 +71,20 @@ def xarray_to_datacube(data): type = "spatial" axis = "y" - dim = {"name": c, "type": type, "values": values} + dim = {"type": type, "values": values} if axis is not None: dim["axis"] = axis if "crs" in data.attrs: dim["reference_system"] = data.attrs["crs"] # todo: non-standardized - dims.append(dim) - cube = {"type": "datacube", "dimensions": dims, "data": data.values.tolist()} + dims[c] = dim + + cube = { + "type": "datacube", + "order": order, + "dimensions": dims, + "data": data.values.tolist(), + } if "nodata" in data.attrs: cube["nodata"] = data.attrs["nodata"] # todo: non-standardized diff --git a/src/openeo_test_suite/lib/process_runner/vito.py b/src/openeo_test_suite/lib/process_runner/vito.py index 7cf5ce8..7e3da83 100644 --- a/src/openeo_test_suite/lib/process_runner/vito.py +++ b/src/openeo_test_suite/lib/process_runner/vito.py @@ -28,4 +28,4 @@ def decode_data(self, data, expected): return data def get_nodata_value(self): - return float('nan') + return float("nan") diff --git a/src/openeo_test_suite/tests/processes/processing/test_example.py b/src/openeo_test_suite/tests/processes/processing/test_example.py index 8cd92b0..d5389d3 100644 --- a/src/openeo_test_suite/tests/processes/processing/test_example.py +++ b/src/openeo_test_suite/tests/processes/processing/test_example.py @@ -1,13 +1,14 @@ import math import warnings from pathlib import Path, posixpath -from openeo_test_suite.lib.process_runner.util import isostr_to_datetime import json5 import pytest import xarray as xr from deepdiff import DeepDiff +from openeo_test_suite.lib.process_runner.util import isostr_to_datetime + # glob path to the test files examples_path = "assets/processes/tests/*.json5" @@ -49,11 +50,7 @@ def test_process(connection, process_levels, processes, id, example, file, level ) ) elif len(processes) > 0 and id not in processes: - pytest.skip( - "Skipping process {} because it is not in the specified processes: {}".format( - id, ", ".join(processes) - ) - ) + pytest.skip("Skipping process {} because it is not in the specified processes".format(id)) # check whether the process is available try: @@ -98,15 +95,22 @@ def test_process(connection, process_levels, processes, id, example, file, level elif returns: check_return_value(example, result, connection, file) else: - pytest.skip("Test for process {} doesn't provide an expected result for arguments: {}".format(id, example["arguments"])) + pytest.skip( + "Test for process {} doesn't provide an expected result for arguments: {}".format( + id, example["arguments"] + ) + ) def prepare_arguments(arguments, process_id, connection, file): for name in arguments: - arguments[name] = prepare_argument(arguments[name], process_id, name, connection, file) - + arguments[name] = prepare_argument( + arguments[name], process_id, name, connection, file + ) + return arguments + def prepare_argument(arg, process_id, name, connection, file): # handle external references to files if isinstance(arg, dict) and "$ref" in arg: @@ -128,8 +132,10 @@ def prepare_argument(arg, process_id, name, connection, file): arg = connection.encode_process_graph(arg, process_id, name) else: for key in arg: - arg[key] = prepare_argument(arg[key], process_id, name, connection, file) - + arg[key] = prepare_argument( + arg[key], process_id, name, connection, file + ) + elif isinstance(arg, list): for i in range(len(arg)): arg[i] = prepare_argument(arg[i], process_id, name, connection, file) @@ -142,7 +148,7 @@ def prepare_argument(arg, process_id, name, connection, file): return arg -def prepare_results(connection, file, example, result = None): +def prepare_results(connection, file, example, result=None): # go through the example and result recursively and convert datetimes to iso strings # could be used for more conversions in the future... @@ -150,7 +156,7 @@ def prepare_results(connection, file, example, result = None): # handle external references to files if isinstance(example, dict) and "$ref" in example: example = load_ref(example["$ref"], file) - + if "type" in example: if example["type"] == "datetime": example = isostr_to_datetime(example["value"]) @@ -165,14 +171,18 @@ def prepare_results(connection, file, example, result = None): if key not in result: (example[key], _) = prepare_results(connection, file, example[key]) else: - (example[key], result[key]) = prepare_results(connection, file, example[key], result[key]) - + (example[key], result[key]) = prepare_results( + connection, file, example[key], result[key] + ) + elif isinstance(example, list): for i in range(len(example)): if i >= len(result): (example[i], _) = prepare_results(connection, file, example[i]) else: - (example[i], result[i]) = prepare_results(connection, file, example[i], result[i]) + (example[i], result[i]) = prepare_results( + connection, file, example[i], result[i] + ) return (example, result) @@ -205,7 +215,9 @@ def check_non_json_values(value): def check_exception(example, result): - assert isinstance(result, Exception), "Excpected an exception, but got {}".format(result) + assert isinstance(result, Exception), "Excpected an exception, but got {}".format( + result + ) if isinstance(example["throws"], str): if result.__class__.__name__ != example["throws"]: warnings.warn( @@ -218,32 +230,37 @@ def check_exception(example, result): def check_return_value(example, result, connection, file): - assert not isinstance(result, Exception), "Unexpected exception: {} ".format(str(result)) + assert not isinstance(result, Exception), "Unexpected exception: {} ".format( + str(result) + ) # handle custom types of data result = connection.decode_data(result, example["returns"]) # decode special types (currently mostly datetimes and nodata) - (example["returns"], result) = prepare_results(connection, file, example["returns"], result) + (example["returns"], result) = prepare_results( + connection, file, example["returns"], result + ) delta = example["delta"] if "delta" in example else 0.0000000001 if isinstance(example["returns"], dict): - assert isinstance(result, dict), "Expected a dict but got {}".format(type(result)) + assert isinstance(result, dict), "Expected a dict but got {}".format( + type(result) + ) exclude_regex_paths = [] exclude_paths = [] ignore_order_func = None if "type" in example["returns"] and example["returns"]["type"] == "datacube": # todo: non-standardized exclude_regex_paths.append( - r"root\['dimensions'\]\[\d+\]\['reference_system'\]" + r"root\['dimensions'\]\[[^\]]+\]\['reference_system'\]" ) # todo: non-standardized exclude_paths.append("root['nodata']") # ignore data if operation is not changing data if example["returns"]["data"] is None: exclude_paths.append("root['data']") - ignore_order_func = lambda level: "dimensions" in level.path() diff = DeepDiff( example["returns"], @@ -257,7 +274,9 @@ def check_return_value(example, result, connection, file): ) assert {} == diff, "Differences: {}".format(str(diff)) elif isinstance(example["returns"], list): - assert isinstance(result, list), "Expected a list but got {}".format(type(result)) + assert isinstance(result, list), "Expected a list but got {}".format( + type(result) + ) diff = DeepDiff( example["returns"], result, From 78f28852e7871e51064810a6bd473ece182cf885 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Sat, 16 Dec 2023 00:00:23 +0100 Subject: [PATCH 07/11] Added option to skip experimental processes --- assets/processes | 2 +- src/openeo_test_suite/tests/conftest.py | 27 ++++++++++++ .../processes/processing/test_example.py | 43 +++++++++++++------ 3 files changed, 57 insertions(+), 15 deletions(-) diff --git a/assets/processes b/assets/processes index 1982451..be0c7d4 160000 --- a/assets/processes +++ b/assets/processes @@ -1 +1 @@ -Subproject commit 1982451a7115bfad41d3fb9153b17a227554053a +Subproject commit be0c7d45916f817ad6aa15cf2a0a1765e000cdbe diff --git a/src/openeo_test_suite/tests/conftest.py b/src/openeo_test_suite/tests/conftest.py index 7f86a49..aa8798a 100644 --- a/src/openeo_test_suite/tests/conftest.py +++ b/src/openeo_test_suite/tests/conftest.py @@ -1,5 +1,7 @@ +import argparse import logging import os +from distutils.util import strtobool import openeo import pytest @@ -14,6 +16,13 @@ def pytest_addoption(parser): default=None, help="The openEO backend URL to connect to.", ) + parser.addoption( + "--experimental", + type=bool, + action=argparse.BooleanOptionalAction, + default=False, + help="Run tests for experimental functionality or not. By default the tests will be skipped.", + ) parser.addoption( "--process-levels", action="store", @@ -55,6 +64,24 @@ def backend_url(request) -> str: return url +@pytest.fixture(scope="session") +def skip_experimental(request) -> str: + """ + Fixture to determine whether experimental functionality should be tested or not. + """ + # TODO: also support getting it from a config file? + if request.config.getoption("--experimental"): + skip = False + elif "OPENEO_EXPERIMENTAL" in os.environ: + skip = bool(strtobool(os.environ["OPENEO_EXPERIMENTAL"])) + else: + skip = True + + _log.info(f"Skip experimental functionality {skip!r}") + + return skip + + @pytest.fixture def auto_authenticate() -> bool: """ diff --git a/src/openeo_test_suite/tests/processes/processing/test_example.py b/src/openeo_test_suite/tests/processes/processing/test_example.py index d5389d3..517e2bf 100644 --- a/src/openeo_test_suite/tests/processes/processing/test_example.py +++ b/src/openeo_test_suite/tests/processes/processing/test_example.py @@ -13,13 +13,13 @@ examples_path = "assets/processes/tests/*.json5" -def get_level(data, test): - if "level" in test: - level = test["level"] - elif "level" in data: - level = data["level"] +def get_prop(prop, data, test, default=None): + if prop in test: + level = test[prop] + elif prop in data: + level = data[prop] else: - level = "L4" + level = default return level @@ -33,24 +33,41 @@ def get_examples(): with file.open() as f: data = json5.load(f) for test in data["tests"]: - level = get_level(data, test) - examples.append([id, test, file, level]) + level = get_prop("level", data, test, "L4") + experimental = get_prop("experimental", data, test, False) + examples.append([id, test, file, level, experimental]) except Exception as e: warnings.warn("Failed to load {} due to {}".format(file, e)) return examples -@pytest.mark.parametrize("id,example,file,level", get_examples()) -def test_process(connection, process_levels, processes, id, example, file, level): - if len(process_levels) > 0 and level not in process_levels: +@pytest.mark.parametrize("id,example,file,level, experimental", get_examples()) +def test_process( + connection, + skip_experimental, + process_levels, + processes, + id, + example, + file, + level, + experimental, +): + if skip_experimental and experimental: + pytest.skip("Skipping experimental process {}".format(id)) + elif len(process_levels) > 0 and level not in process_levels: pytest.skip( "Skipping process {} because {} is not in the specified levels: {}".format( id, level, ", ".join(process_levels) ) ) elif len(processes) > 0 and id not in processes: - pytest.skip("Skipping process {} because it is not in the specified processes".format(id)) + pytest.skip( + "Skipping process {} because it is not in the specified processes".format( + id + ) + ) # check whether the process is available try: @@ -73,8 +90,6 @@ def test_process(connection, process_levels, processes, id, example, file, level except Exception as e: pytest.skip(str(e)) - # todo: handle experimental processes (warning instead of error?) - experimental = example["experimental"] if "experimental" in example else False throws = bool(example["throws"]) if "throws" in example else False returns = "returns" in example From 08b8530441f5b801c80c210eb5df327e1f5f200d Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Tue, 19 Dec 2023 12:37:48 +0100 Subject: [PATCH 08/11] Update tests --- assets/processes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/processes b/assets/processes index be0c7d4..3125cbf 160000 --- a/assets/processes +++ b/assets/processes @@ -1 +1 @@ -Subproject commit be0c7d45916f817ad6aa15cf2a0a1765e000cdbe +Subproject commit 3125cbf439f1b370ed283754018a389aea0b7b9c From 239766b52ccc273d9ce0c4b8cc6195ac478a3866 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Tue, 19 Dec 2023 12:50:05 +0100 Subject: [PATCH 09/11] Move fixtures to core to allow usage in workflows and process metadata, README --- src/openeo_test_suite/tests/conftest.py | 38 +++++++++++++++++++ .../tests/processes/README.md | 7 +++- .../tests/processes/processing/conftest.py | 38 ------------------- 3 files changed, 43 insertions(+), 40 deletions(-) diff --git a/src/openeo_test_suite/tests/conftest.py b/src/openeo_test_suite/tests/conftest.py index aa8798a..f629a6c 100644 --- a/src/openeo_test_suite/tests/conftest.py +++ b/src/openeo_test_suite/tests/conftest.py @@ -82,6 +82,44 @@ def skip_experimental(request) -> str: return skip +@pytest.fixture(scope="session") +def process_levels(request): + """ + Fixture to get the desired openEO profiles levels. + """ + levels_str = "" + # TODO: also support getting it from a config file? + if request.config.getoption("--process-levels"): + levels_str = request.config.getoption("--process-levels") + elif "OPENEO_PROCESS_LEVELS" in os.environ: + levels_str = os.environ["OPENEO_PROCESS_LEVELS"] + + if isinstance(levels_str, str) and len(levels_str) > 0: + _log.info(f"Testing process levels {levels_str!r}") + return list(map(lambda l: l.strip(), levels_str.split(","))) + else: + return [] + + +@pytest.fixture(scope="session") +def processes(request): + """ + Fixture to get the desired profiles to test against. + """ + processes_str = "" + # TODO: also support getting it from a config file? + if request.config.getoption("--processes"): + processes_str = request.config.getoption("--processes") + elif "OPENEO_PROCESSES" in os.environ: + processes_str = os.environ["OPENEO_PROCESSES"] + + if isinstance(processes_str, str) and len(processes_str) > 0: + _log.info(f"Testing processes {processes_str!r}") + return list(map(lambda p: p.strip(), processes_str.split(","))) + else: + return [] + + @pytest.fixture def auto_authenticate() -> bool: """ diff --git a/src/openeo_test_suite/tests/processes/README.md b/src/openeo_test_suite/tests/processes/README.md index f20fb12..f1f191e 100644 --- a/src/openeo_test_suite/tests/processes/README.md +++ b/src/openeo_test_suite/tests/processes/README.md @@ -9,13 +9,16 @@ ## Individual Process Testing -Examples: +### Examples - `pytest --openeo-backend-url=https://openeo.cloud --processes=min,max` - `pytest --runner=vito --process-levels=L1,L2,L2A` - `pytest --runner=dask` +- `pytest src/openeo_test_suite/tests/processes/processing/test_example.py --runner=dask` -Parameters: +### Parameters + +Specify `src/openeo_test_suite/tests/processes/processing/test_example.py` to only run individual process tests. - `--runner`: The execution engine. One of: - `vito` (needs being installed) diff --git a/src/openeo_test_suite/tests/processes/processing/conftest.py b/src/openeo_test_suite/tests/processes/processing/conftest.py index 68cbf84..3b81dba 100644 --- a/src/openeo_test_suite/tests/processes/processing/conftest.py +++ b/src/openeo_test_suite/tests/processes/processing/conftest.py @@ -27,44 +27,6 @@ def runner(request) -> str: return runner -@pytest.fixture(scope="session") -def process_levels(request): - """ - Fixture to get the desired openEO profiles levels. - """ - levels_str = "" - # TODO: also support getting it from a config file? - if request.config.getoption("--process-levels"): - levels_str = request.config.getoption("--process-levels") - elif "OPENEO_PROCESS_LEVELS" in os.environ: - levels_str = os.environ["OPENEO_PROCESS_LEVELS"] - - if isinstance(levels_str, str) and len(levels_str) > 0: - _log.info(f"Testing process levels {levels_str!r}") - return list(map(lambda l: l.strip(), levels_str.split(","))) - else: - return [] - - -@pytest.fixture(scope="session") -def processes(request): - """ - Fixture to get the desired profiles to test against. - """ - processes_str = "" - # TODO: also support getting it from a config file? - if request.config.getoption("--processes"): - processes_str = request.config.getoption("--processes") - elif "OPENEO_PROCESSES" in os.environ: - processes_str = os.environ["OPENEO_PROCESSES"] - - if isinstance(processes_str, str) and len(processes_str) > 0: - _log.info(f"Testing processes {processes_str!r}") - return list(map(lambda p: p.strip(), processes_str.split(","))) - else: - return [] - - @pytest.fixture def connection( backend_url: str, runner: str, auto_authenticate: bool, capfd From b10b83a5454e1fdd313440a591b6b513dfd16eb6 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Fri, 22 Dec 2023 14:16:10 +0100 Subject: [PATCH 10/11] Add .geojson support for $ref --- .../tests/processes/processing/test_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/openeo_test_suite/tests/processes/processing/test_example.py b/src/openeo_test_suite/tests/processes/processing/test_example.py index 517e2bf..d788bdd 100644 --- a/src/openeo_test_suite/tests/processes/processing/test_example.py +++ b/src/openeo_test_suite/tests/processes/processing/test_example.py @@ -203,7 +203,7 @@ def prepare_results(connection, file, example, result=None): def load_ref(ref, file): - if ref.endswith(".json") or ref.endswith(".json5"): + if ref.endswith(".json") or ref.endswith(".json5") or ref.endswith(".geojson"): try: path = posixpath.join(file.parent, ref) with open(path) as f: From ee6898e6d4b7c7d4665f1fcb96f93baa519e5077 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Fri, 22 Dec 2023 16:24:15 +0100 Subject: [PATCH 11/11] Update submodule --- assets/processes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/processes b/assets/processes index 3125cbf..0e979e4 160000 --- a/assets/processes +++ b/assets/processes @@ -1 +1 @@ -Subproject commit 3125cbf439f1b370ed283754018a389aea0b7b9c +Subproject commit 0e979e4307b9aad1855eb3a87f7f53a3247057a9