From e344c4c105e8d5b13e16d99bc19baeb92ca79cf0 Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Thu, 22 Apr 2021 13:53:54 +0200 Subject: [PATCH] Issue #204: Raises warning when using a CRS for the geometry of `aggregate_spatial` and `mask_polygon` Harmonize geometry handling in `aggregate_spatial` and `mask_polygon`, including handling of non-standard crs and whitelisting of allowed GeoJSON types Raises warning when using a CRS for the geometry of `aggregate_spatial` and `mask_polygon` Also mark this more clearly in the docs Related: #202, Open-EO/openeo-processes#235, Open-EO/openeo-processes#53 --- CHANGELOG.md | 4 + openeo/rest/datacube.py | 101 ++++++++------ openeo/rest/imagecollectionclient.py | 10 +- tests/rest/datacube/test_datacube.py | 3 +- tests/rest/datacube/test_datacube100.py | 178 +++++++++++++++++++++++- 5 files changed, 239 insertions(+), 57 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d98fd5cf9..c69cad0fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Allow, but raise warning when specifying a CRS for the geometry passed to `aggregate_spatial` and `mask_polygon`, + which is non-standard/experimental feature, only supported by specific back-ends + ([#204](https://github.com/Open-EO/openeo-python-client/issues/204)) + ### Changed ### Removed diff --git a/openeo/rest/datacube.py b/openeo/rest/datacube.py index f598e65f1..2a4dede1a 100644 --- a/openeo/rest/datacube.py +++ b/openeo/rest/datacube.py @@ -161,6 +161,7 @@ def load_collection( temporal_extent = cls._get_temporal_extent(extent=temporal_extent) arguments = { 'id': collection_id, + # TODO: spatial_extent could also be a "geojson" subtype object, so we might want to allow (and convert) shapely shapes as well here. 'spatial_extent': spatial_extent, 'temporal_extent': temporal_extent, } @@ -634,10 +635,45 @@ def _merge_operator_binary_cubes(self, operator: str, other: 'DataCube', left_ar } )) + def _get_geometry_argument( + self, + geometry: Union[shapely.geometry.base.BaseGeometry, dict, str, pathlib.Path, Parameter], + valid_geojson_types: List[str], + crs: str = None, + ) -> Union[dict, Parameter, PGNode]: + """ + Convert input to a geometry as "geojson" subtype object. + """ + if isinstance(geometry, (str, pathlib.Path)): + # Assumption: `geometry` is path to polygon is a path to vector file at backend. + # TODO #104: `read_vector` is non-standard process. + # TODO: If path exists client side: load it client side? + return PGNode(process_id="read_vector", arguments={"filename": str(geometry)}) + elif isinstance(geometry, Parameter): + return geometry + + if isinstance(geometry, shapely.geometry.base.BaseGeometry): + geometry = mapping(geometry) + if not isinstance(geometry, dict): + raise OpenEoClientException("Invalid geometry argument: {g!r}".format(g=geometry)) + + if geometry.get("type") not in valid_geojson_types: + raise OpenEoClientException("Invalid geometry type {t!r}, must be one of {s}".format( + t=geometry.get("type"), s=valid_geojson_types + )) + if crs: + # TODO: don't warn when the crs is Lon-Lat like EPSG:4326? + warnings.warn("Geometry with non-Lon-Lat CRS {c!r} is only supported by specific back-ends.".format(c=crs)) + # TODO #204 alternative for non-standard CRS in GeoJSON object? + geometry["crs"] = {"type": "name", "properties": {"name": crs}} + return geometry + def aggregate_spatial( self, geometries: Union[shapely.geometry.base.BaseGeometry, dict, str, pathlib.Path, Parameter], - reducer: Union[str, PGNode, typing.Callable] + reducer: Union[str, PGNode, typing.Callable], + crs: str = None, + # TODO arguments: target dimension, context ) -> 'DataCube': """ Aggregates statistics for one or more geometries (e.g. zonal statistics for polygons) @@ -645,24 +681,17 @@ def aggregate_spatial( :param geometries: shapely geometry, GeoJSON dictionary or path to GeoJSON file :param reducer: a callback function that creates a process graph, see :ref:`callbackfunctions` - :return: + :param crs: The spatial reference system of the provided polygon. + By default longitude-latitude (EPSG:4326) is assumed. + + .. note:: this ``crs`` argument is a non-standard/experimental feature, only supported by specific back-ends. + See https://github.com/Open-EO/openeo-processes/issues/235 for details. """ - # TODO #125 arguments: target dimension, context - if isinstance(geometries, (str, pathlib.Path)): - # polygon is a path to vector file - # TODO this is non-standard process: check capabilities? #104 #40. Load polygon client side otherwise - geometries = PGNode(process_id="read_vector", arguments={"filename": str(geometries)}) - elif isinstance(geometries, shapely.geometry.base.BaseGeometry): - geometries = mapping(geometries) - elif isinstance(geometries, dict) and geometries["type"] in ( - # TODO: support more geometry types? - "Polygon", "MultiPolygon", "GeometryCollection", 'FeatureCollection' - ): - pass - elif isinstance(geometries, Parameter): - pass - else: - raise OpenEoClientException("Invalid `geometries`: {g!r}".format(g=geometries)) + valid_geojson_types = [ + "Point", "MultiPoint", "LineString", "MultiLineString", + "Polygon", "MultiPolygon", "GeometryCollection", "FeatureCollection" + ] + geometries = self._get_geometry_argument(geometries, valid_geojson_types=valid_geojson_types, crs=crs) reducer = self._get_callback(reducer, parent_parameters=["data"]) return self.process(process_id="aggregate_spatial", data=THIS, geometries=geometries, reducer=reducer) @@ -1111,8 +1140,10 @@ def mask(self, mask: 'DataCube' = None, replacement=None) -> 'DataCube': ) def mask_polygon( - self, mask: Union[Polygon, MultiPolygon, str, pathlib.Path] = None, - srs="EPSG:4326", replacement=None, inside: bool = None + self, + mask: Union[shapely.geometry.base.BaseGeometry, dict, str, pathlib.Path, Parameter], + srs: str = None, + replacement=None, inside: bool = None ) -> 'DataCube': """ Applies a polygon mask to a raster data cube. To apply a raster mask use `mask`. @@ -1125,31 +1156,15 @@ def mask_polygon( which defaults to `no data`. :param mask: A polygon, provided as a :class:`shapely.geometry.Polygon` or :class:`shapely.geometry.MultiPolygon`, or a filename pointing to a valid vector file - :param srs: The reference system of the provided polygon, by default this is Lat Lon (EPSG:4326). + :param srs: The spatial reference system of the provided polygon. + By default longitude-latitude (EPSG:4326) is assumed. + + .. note:: this ``srs`` argument is a non-standard/experimental feature, only supported by specific back-ends. + See https://github.com/Open-EO/openeo-processes/issues/235 for details. :param replacement: the value to replace the masked pixels with """ - if isinstance(mask, (str, pathlib.Path)): - # TODO: default to loading file client side? - # TODO: change read_vector to load_uploaded_files https://github.com/Open-EO/openeo-processes/pull/106 - read_vector = self.process( - process_id='read_vector', - arguments={'filename': str(mask)} - ) - mask = {'from_node': read_vector._pg} - elif isinstance(mask, shapely.geometry.base.BaseGeometry): - if mask.area == 0: - raise ValueError("Mask {m!s} has an area of {a!r}".format(m=mask, a=mask.area)) - mask = shapely.geometry.mapping(mask) - mask['crs'] = { - 'type': 'name', - 'properties': {'name': srs} - } - elif isinstance(mask, Parameter): - pass - else: - # Assume mask is already a valid GeoJSON object - assert "type" in mask - + valid_geojson_types = ["Polygon", "MultiPolygon", "GeometryCollection", "Feature", "FeatureCollection"] + mask = self._get_geometry_argument(mask, valid_geojson_types=valid_geojson_types, crs=srs) return self.process( process_id="mask_polygon", arguments=dict_no_none( diff --git a/openeo/rest/imagecollectionclient.py b/openeo/rest/imagecollectionclient.py index a2e17b634..d96385262 100644 --- a/openeo/rest/imagecollectionclient.py +++ b/openeo/rest/imagecollectionclient.py @@ -763,7 +763,7 @@ def linear_scale_range(self, input_min, input_max, output_min, output_max) -> 'I } return self.graph_add_process(process_id, args) - def mask(self, polygon: Union[Polygon, MultiPolygon,str]=None, srs="EPSG:4326", rastermask: 'ImageCollection'=None, + def mask(self, polygon: Union[Polygon, MultiPolygon,str]=None, srs=None, rastermask: 'ImageCollection'=None, replacement=None) -> 'ImageCollection': """ Mask the image collection using either a polygon or a raster mask. @@ -805,12 +805,8 @@ def mask(self, polygon: Union[Polygon, MultiPolygon,str]=None, srs="EPSG:4326", raise ValueError("Mask {m!s} has an area of {a!r}".format(m=polygon, a=polygon.area)) geojson = mapping(polygon) - geojson['crs'] = { - 'type': 'name', - 'properties': { - 'name': srs - } - } + if srs: + geojson['crs'] = {'type': 'name', 'properties': {'name': srs}} mask = geojson new_collection = self elif rastermask is not None: diff --git a/tests/rest/datacube/test_datacube.py b/tests/rest/datacube/test_datacube.py index a18aa1d1e..064face99 100644 --- a/tests/rest/datacube/test_datacube.py +++ b/tests/rest/datacube/test_datacube.py @@ -320,9 +320,8 @@ def test_mask_polygon(s2cube, api_version): assert graph["arguments"] == { "data": {'from_node': 'loadcollection1'}, "mask": { + 'type': 'Polygon', 'coordinates': (((0.0, 0.0), (1.9, 0.0), (1.9, 1.9), (0.0, 1.9), (0.0, 0.0)),), - 'crs': {'properties': {'name': 'EPSG:4326'}, 'type': 'name'}, - 'type': 'Polygon' } } diff --git a/tests/rest/datacube/test_datacube100.py b/tests/rest/datacube/test_datacube100.py index 9c3e6b0a0..c5447a3c8 100644 --- a/tests/rest/datacube/test_datacube100.py +++ b/tests/rest/datacube/test_datacube100.py @@ -156,19 +156,187 @@ def test_filter_bbox_args_and_kwargs_conflict(con100: Connection, args, kwargs, con100.load_collection("S2").filter_bbox(*args, **kwargs) -def test_mask_polygon(con100: Connection): +def test_aggregate_spatial_basic(con100: Connection): img = con100.load_collection("S2") polygon = shapely.geometry.box(0, 0, 1, 1) + masked = img.aggregate_spatial(geometries=polygon, reducer="mean") + assert sorted(masked.graph.keys()) == ["aggregatespatial1", "loadcollection1"] + assert masked.graph["aggregatespatial1"] == { + "process_id": "aggregate_spatial", + "arguments": { + "data": {"from_node": "loadcollection1"}, + "geometries": { + "type": "Polygon", + "coordinates": (((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),), + }, + "reducer": {"process_graph": { + "mean1": {"process_id": "mean", "arguments": {"data": {"from_parameter": "data"}}, "result": True} + }} + }, + "result": True + } + + +@pytest.mark.parametrize(["polygon", "expected_geometries"], [ + ( + shapely.geometry.box(0, 0, 1, 1), + {"type": "Polygon", "coordinates": (((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),)}, + ), + ( + {"type": "Polygon", "coordinates": (((1, 0), (1, 1), (0, 1), (0, 0), (1, 0)),)}, + {"type": "Polygon", "coordinates": (((1, 0), (1, 1), (0, 1), (0, 0), (1, 0)),)}, + ), + ( + shapely.geometry.MultiPolygon([shapely.geometry.box(0, 0, 1, 1)]), + {"type": "MultiPolygon", "coordinates": [(((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),)]}, + ), + ( + shapely.geometry.GeometryCollection([shapely.geometry.box(0, 0, 1, 1)]), + {"type": "GeometryCollection", "geometries": [ + {"type": "Polygon", "coordinates": (((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),)} + ]}, + ), + ( + { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", "properties": {}, + "geometry": {"type": "Polygon", "coordinates": (((1, 0), (1, 1), (0, 1), (0, 0), (1, 0)),)}, + }, + + ] + }, + { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", "properties": {}, + "geometry": {"type": "Polygon", "coordinates": (((1, 0), (1, 1), (0, 1), (0, 0), (1, 0)),)}, + }, + + ] + }, + ), +]) +def test_aggregate_spatial_types(con100: Connection, polygon, expected_geometries): + img = con100.load_collection("S2") + masked = img.aggregate_spatial(geometries=polygon, reducer="mean") + assert sorted(masked.graph.keys()) == ["aggregatespatial1", "loadcollection1"] + assert masked.graph["aggregatespatial1"] == { + "process_id": "aggregate_spatial", + "arguments": { + "data": {"from_node": "loadcollection1"}, + "geometries": expected_geometries, + "reducer": {"process_graph": { + "mean1": {"process_id": "mean", "arguments": {"data": {"from_parameter": "data"}}, "result": True} + }} + }, + "result": True + } + + +def test_aggregate_spatial_with_crs(con100: Connection, recwarn): + img = con100.load_collection("S2") + polygon = shapely.geometry.box(0, 0, 1, 1) + masked = img.aggregate_spatial(geometries=polygon, reducer="mean", crs="EPSG:32631") + warnings = [str(w.message) for w in recwarn] + assert "Geometry with non-Lon-Lat CRS 'EPSG:32631' is only supported by specific back-ends." in warnings + assert sorted(masked.graph.keys()) == ["aggregatespatial1", "loadcollection1"] + assert masked.graph["aggregatespatial1"] == { + "process_id": "aggregate_spatial", + "arguments": { + "data": {"from_node": "loadcollection1"}, + "geometries": { + "type": "Polygon", + "coordinates": (((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),), + "crs": {"properties": {"name": "EPSG:32631"}, "type": "name"}, + }, + "reducer": {"process_graph": { + "mean1": {"process_id": "mean", "arguments": {"data": {"from_parameter": "data"}}, "result": True} + }} + }, + "result": True + } + + +def test_mask_polygon_basic(con100: Connection): + img = con100.load_collection("S2") + polygon = shapely.geometry.box(0, 0, 1, 1) + masked = img.mask_polygon(mask=polygon) + assert sorted(masked.graph.keys()) == ["loadcollection1", "maskpolygon1"] + assert masked.graph["maskpolygon1"] == { + "process_id": "mask_polygon", + "arguments": { + "data": {"from_node": "loadcollection1"}, + "mask": { + "type": "Polygon", + "coordinates": (((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),), + } + }, + "result": True + } + + +@pytest.mark.parametrize(["polygon", "expected_mask"], [ + ( + shapely.geometry.box(0, 0, 1, 1), + {"type": "Polygon", "coordinates": (((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),)}, + ), + ( + {"type": "Polygon", "coordinates": (((1, 0), (1, 1), (0, 1), (0, 0), (1, 0)),)}, + {"type": "Polygon", "coordinates": (((1, 0), (1, 1), (0, 1), (0, 0), (1, 0)),)}, + ), + ( + shapely.geometry.MultiPolygon([shapely.geometry.box(0, 0, 1, 1)]), + {"type": "MultiPolygon", "coordinates": [(((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),)]}, + ), + ( + shapely.geometry.GeometryCollection([shapely.geometry.box(0, 0, 1, 1)]), + {"type": "GeometryCollection", "geometries": [ + {"type": "Polygon", "coordinates": (((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),)} + ]}, + ), + ( + { + "type": "Feature", "properties": {}, + "geometry": {"type": "Polygon", "coordinates": (((1, 0), (1, 1), (0, 1), (0, 0), (1, 0)),)}, + }, + { + "type": "Feature", "properties": {}, + "geometry": {"type": "Polygon", "coordinates": (((1, 0), (1, 1), (0, 1), (0, 0), (1, 0)),)}, + }, + ), +]) +def test_mask_polygon_types(con100: Connection, polygon, expected_mask): + img = con100.load_collection("S2") masked = img.mask_polygon(mask=polygon) assert sorted(masked.graph.keys()) == ["loadcollection1", "maskpolygon1"] assert masked.graph["maskpolygon1"] == { "process_id": "mask_polygon", "arguments": { "data": {"from_node": "loadcollection1"}, - 'mask': { - 'coordinates': (((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),), - 'crs': {'properties': {'name': 'EPSG:4326'}, 'type': 'name'}, - 'type': 'Polygon'} + "mask": expected_mask + }, + "result": True + } + + +def test_mask_polygon_with_crs(con100: Connection, recwarn): + img = con100.load_collection("S2") + polygon = shapely.geometry.box(0, 0, 1, 1) + masked = img.mask_polygon(mask=polygon, srs="EPSG:32631") + warnings = [str(w.message) for w in recwarn] + assert "Geometry with non-Lon-Lat CRS 'EPSG:32631' is only supported by specific back-ends." in warnings + assert sorted(masked.graph.keys()) == ["loadcollection1", "maskpolygon1"] + assert masked.graph["maskpolygon1"] == { + "process_id": "mask_polygon", + "arguments": { + "data": {"from_node": "loadcollection1"}, + "mask": { + "type": "Polygon", "coordinates": (((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),), + "crs": {"type": "name", "properties": {"name": "EPSG:32631"}}, + }, }, "result": True }