Skip to content

Commit

Permalink
Issue #204: Raises warning when using a CRS for the geometry of `aggr…
Browse files Browse the repository at this point in the history
…egate_spatial` and `mask_polygon`

Harmonize geometry handling in `aggregate_spatial` and `mask_polygon`, including handling of non-standard crs and whitelisting of allowed GeoJSON types
Raises warning when using a CRS for the geometry of `aggregate_spatial` and `mask_polygon`
Also mark this more clearly in the docs

Related: #202, Open-EO/openeo-processes#235, Open-EO/openeo-processes#53
  • Loading branch information
soxofaan committed Apr 22, 2021
1 parent 451695f commit e344c4c
Show file tree
Hide file tree
Showing 5 changed files with 239 additions and 57 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- Allow, but raise warning when specifying a CRS for the geometry passed to `aggregate_spatial` and `mask_polygon`,
which is non-standard/experimental feature, only supported by specific back-ends
([#204](https://github.com/Open-EO/openeo-python-client/issues/204))

### Changed

### Removed
Expand Down
101 changes: 58 additions & 43 deletions openeo/rest/datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ def load_collection(
temporal_extent = cls._get_temporal_extent(extent=temporal_extent)
arguments = {
'id': collection_id,
# TODO: spatial_extent could also be a "geojson" subtype object, so we might want to allow (and convert) shapely shapes as well here.
'spatial_extent': spatial_extent,
'temporal_extent': temporal_extent,
}
Expand Down Expand Up @@ -634,35 +635,63 @@ def _merge_operator_binary_cubes(self, operator: str, other: 'DataCube', left_ar
}
))

def _get_geometry_argument(
self,
geometry: Union[shapely.geometry.base.BaseGeometry, dict, str, pathlib.Path, Parameter],
valid_geojson_types: List[str],
crs: str = None,
) -> Union[dict, Parameter, PGNode]:
"""
Convert input to a geometry as "geojson" subtype object.
"""
if isinstance(geometry, (str, pathlib.Path)):
# Assumption: `geometry` is path to polygon is a path to vector file at backend.
# TODO #104: `read_vector` is non-standard process.
# TODO: If path exists client side: load it client side?
return PGNode(process_id="read_vector", arguments={"filename": str(geometry)})
elif isinstance(geometry, Parameter):
return geometry

if isinstance(geometry, shapely.geometry.base.BaseGeometry):
geometry = mapping(geometry)
if not isinstance(geometry, dict):
raise OpenEoClientException("Invalid geometry argument: {g!r}".format(g=geometry))

if geometry.get("type") not in valid_geojson_types:
raise OpenEoClientException("Invalid geometry type {t!r}, must be one of {s}".format(
t=geometry.get("type"), s=valid_geojson_types
))
if crs:
# TODO: don't warn when the crs is Lon-Lat like EPSG:4326?
warnings.warn("Geometry with non-Lon-Lat CRS {c!r} is only supported by specific back-ends.".format(c=crs))
# TODO #204 alternative for non-standard CRS in GeoJSON object?
geometry["crs"] = {"type": "name", "properties": {"name": crs}}
return geometry

def aggregate_spatial(
self,
geometries: Union[shapely.geometry.base.BaseGeometry, dict, str, pathlib.Path, Parameter],
reducer: Union[str, PGNode, typing.Callable]
reducer: Union[str, PGNode, typing.Callable],
crs: str = None,
# TODO arguments: target dimension, context
) -> 'DataCube':
"""
Aggregates statistics for one or more geometries (e.g. zonal statistics for polygons)
over the spatial dimensions.
:param geometries: shapely geometry, GeoJSON dictionary or path to GeoJSON file
:param reducer: a callback function that creates a process graph, see :ref:`callbackfunctions`
:return:
:param crs: The spatial reference system of the provided polygon.
By default longitude-latitude (EPSG:4326) is assumed.
.. note:: this ``crs`` argument is a non-standard/experimental feature, only supported by specific back-ends.
See https://github.com/Open-EO/openeo-processes/issues/235 for details.
"""
# TODO #125 arguments: target dimension, context
if isinstance(geometries, (str, pathlib.Path)):
# polygon is a path to vector file
# TODO this is non-standard process: check capabilities? #104 #40. Load polygon client side otherwise
geometries = PGNode(process_id="read_vector", arguments={"filename": str(geometries)})
elif isinstance(geometries, shapely.geometry.base.BaseGeometry):
geometries = mapping(geometries)
elif isinstance(geometries, dict) and geometries["type"] in (
# TODO: support more geometry types?
"Polygon", "MultiPolygon", "GeometryCollection", 'FeatureCollection'
):
pass
elif isinstance(geometries, Parameter):
pass
else:
raise OpenEoClientException("Invalid `geometries`: {g!r}".format(g=geometries))
valid_geojson_types = [
"Point", "MultiPoint", "LineString", "MultiLineString",
"Polygon", "MultiPolygon", "GeometryCollection", "FeatureCollection"
]
geometries = self._get_geometry_argument(geometries, valid_geojson_types=valid_geojson_types, crs=crs)
reducer = self._get_callback(reducer, parent_parameters=["data"])
return self.process(process_id="aggregate_spatial", data=THIS, geometries=geometries, reducer=reducer)

Expand Down Expand Up @@ -1111,8 +1140,10 @@ def mask(self, mask: 'DataCube' = None, replacement=None) -> 'DataCube':
)

def mask_polygon(
self, mask: Union[Polygon, MultiPolygon, str, pathlib.Path] = None,
srs="EPSG:4326", replacement=None, inside: bool = None
self,
mask: Union[shapely.geometry.base.BaseGeometry, dict, str, pathlib.Path, Parameter],
srs: str = None,
replacement=None, inside: bool = None
) -> 'DataCube':
"""
Applies a polygon mask to a raster data cube. To apply a raster mask use `mask`.
Expand All @@ -1125,31 +1156,15 @@ def mask_polygon(
which defaults to `no data`.
:param mask: A polygon, provided as a :class:`shapely.geometry.Polygon` or :class:`shapely.geometry.MultiPolygon`, or a filename pointing to a valid vector file
:param srs: The reference system of the provided polygon, by default this is Lat Lon (EPSG:4326).
:param srs: The spatial reference system of the provided polygon.
By default longitude-latitude (EPSG:4326) is assumed.
.. note:: this ``srs`` argument is a non-standard/experimental feature, only supported by specific back-ends.
See https://github.com/Open-EO/openeo-processes/issues/235 for details.
:param replacement: the value to replace the masked pixels with
"""
if isinstance(mask, (str, pathlib.Path)):
# TODO: default to loading file client side?
# TODO: change read_vector to load_uploaded_files https://github.com/Open-EO/openeo-processes/pull/106
read_vector = self.process(
process_id='read_vector',
arguments={'filename': str(mask)}
)
mask = {'from_node': read_vector._pg}
elif isinstance(mask, shapely.geometry.base.BaseGeometry):
if mask.area == 0:
raise ValueError("Mask {m!s} has an area of {a!r}".format(m=mask, a=mask.area))
mask = shapely.geometry.mapping(mask)
mask['crs'] = {
'type': 'name',
'properties': {'name': srs}
}
elif isinstance(mask, Parameter):
pass
else:
# Assume mask is already a valid GeoJSON object
assert "type" in mask

valid_geojson_types = ["Polygon", "MultiPolygon", "GeometryCollection", "Feature", "FeatureCollection"]
mask = self._get_geometry_argument(mask, valid_geojson_types=valid_geojson_types, crs=srs)
return self.process(
process_id="mask_polygon",
arguments=dict_no_none(
Expand Down
10 changes: 3 additions & 7 deletions openeo/rest/imagecollectionclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -763,7 +763,7 @@ def linear_scale_range(self, input_min, input_max, output_min, output_max) -> 'I
}
return self.graph_add_process(process_id, args)

def mask(self, polygon: Union[Polygon, MultiPolygon,str]=None, srs="EPSG:4326", rastermask: 'ImageCollection'=None,
def mask(self, polygon: Union[Polygon, MultiPolygon,str]=None, srs=None, rastermask: 'ImageCollection'=None,
replacement=None) -> 'ImageCollection':
"""
Mask the image collection using either a polygon or a raster mask.
Expand Down Expand Up @@ -805,12 +805,8 @@ def mask(self, polygon: Union[Polygon, MultiPolygon,str]=None, srs="EPSG:4326",
raise ValueError("Mask {m!s} has an area of {a!r}".format(m=polygon, a=polygon.area))

geojson = mapping(polygon)
geojson['crs'] = {
'type': 'name',
'properties': {
'name': srs
}
}
if srs:
geojson['crs'] = {'type': 'name', 'properties': {'name': srs}}
mask = geojson
new_collection = self
elif rastermask is not None:
Expand Down
3 changes: 1 addition & 2 deletions tests/rest/datacube/test_datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,9 +320,8 @@ def test_mask_polygon(s2cube, api_version):
assert graph["arguments"] == {
"data": {'from_node': 'loadcollection1'},
"mask": {
'type': 'Polygon',
'coordinates': (((0.0, 0.0), (1.9, 0.0), (1.9, 1.9), (0.0, 1.9), (0.0, 0.0)),),
'crs': {'properties': {'name': 'EPSG:4326'}, 'type': 'name'},
'type': 'Polygon'
}
}

Expand Down
178 changes: 173 additions & 5 deletions tests/rest/datacube/test_datacube100.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,19 +156,187 @@ def test_filter_bbox_args_and_kwargs_conflict(con100: Connection, args, kwargs,
con100.load_collection("S2").filter_bbox(*args, **kwargs)


def test_mask_polygon(con100: Connection):
def test_aggregate_spatial_basic(con100: Connection):
img = con100.load_collection("S2")
polygon = shapely.geometry.box(0, 0, 1, 1)
masked = img.aggregate_spatial(geometries=polygon, reducer="mean")
assert sorted(masked.graph.keys()) == ["aggregatespatial1", "loadcollection1"]
assert masked.graph["aggregatespatial1"] == {
"process_id": "aggregate_spatial",
"arguments": {
"data": {"from_node": "loadcollection1"},
"geometries": {
"type": "Polygon",
"coordinates": (((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),),
},
"reducer": {"process_graph": {
"mean1": {"process_id": "mean", "arguments": {"data": {"from_parameter": "data"}}, "result": True}
}}
},
"result": True
}


@pytest.mark.parametrize(["polygon", "expected_geometries"], [
(
shapely.geometry.box(0, 0, 1, 1),
{"type": "Polygon", "coordinates": (((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),)},
),
(
{"type": "Polygon", "coordinates": (((1, 0), (1, 1), (0, 1), (0, 0), (1, 0)),)},
{"type": "Polygon", "coordinates": (((1, 0), (1, 1), (0, 1), (0, 0), (1, 0)),)},
),
(
shapely.geometry.MultiPolygon([shapely.geometry.box(0, 0, 1, 1)]),
{"type": "MultiPolygon", "coordinates": [(((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),)]},
),
(
shapely.geometry.GeometryCollection([shapely.geometry.box(0, 0, 1, 1)]),
{"type": "GeometryCollection", "geometries": [
{"type": "Polygon", "coordinates": (((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),)}
]},
),
(
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature", "properties": {},
"geometry": {"type": "Polygon", "coordinates": (((1, 0), (1, 1), (0, 1), (0, 0), (1, 0)),)},
},
]
},
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature", "properties": {},
"geometry": {"type": "Polygon", "coordinates": (((1, 0), (1, 1), (0, 1), (0, 0), (1, 0)),)},
},
]
},
),
])
def test_aggregate_spatial_types(con100: Connection, polygon, expected_geometries):
img = con100.load_collection("S2")
masked = img.aggregate_spatial(geometries=polygon, reducer="mean")
assert sorted(masked.graph.keys()) == ["aggregatespatial1", "loadcollection1"]
assert masked.graph["aggregatespatial1"] == {
"process_id": "aggregate_spatial",
"arguments": {
"data": {"from_node": "loadcollection1"},
"geometries": expected_geometries,
"reducer": {"process_graph": {
"mean1": {"process_id": "mean", "arguments": {"data": {"from_parameter": "data"}}, "result": True}
}}
},
"result": True
}


def test_aggregate_spatial_with_crs(con100: Connection, recwarn):
img = con100.load_collection("S2")
polygon = shapely.geometry.box(0, 0, 1, 1)
masked = img.aggregate_spatial(geometries=polygon, reducer="mean", crs="EPSG:32631")
warnings = [str(w.message) for w in recwarn]
assert "Geometry with non-Lon-Lat CRS 'EPSG:32631' is only supported by specific back-ends." in warnings
assert sorted(masked.graph.keys()) == ["aggregatespatial1", "loadcollection1"]
assert masked.graph["aggregatespatial1"] == {
"process_id": "aggregate_spatial",
"arguments": {
"data": {"from_node": "loadcollection1"},
"geometries": {
"type": "Polygon",
"coordinates": (((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),),
"crs": {"properties": {"name": "EPSG:32631"}, "type": "name"},
},
"reducer": {"process_graph": {
"mean1": {"process_id": "mean", "arguments": {"data": {"from_parameter": "data"}}, "result": True}
}}
},
"result": True
}


def test_mask_polygon_basic(con100: Connection):
img = con100.load_collection("S2")
polygon = shapely.geometry.box(0, 0, 1, 1)
masked = img.mask_polygon(mask=polygon)
assert sorted(masked.graph.keys()) == ["loadcollection1", "maskpolygon1"]
assert masked.graph["maskpolygon1"] == {
"process_id": "mask_polygon",
"arguments": {
"data": {"from_node": "loadcollection1"},
"mask": {
"type": "Polygon",
"coordinates": (((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),),
}
},
"result": True
}


@pytest.mark.parametrize(["polygon", "expected_mask"], [
(
shapely.geometry.box(0, 0, 1, 1),
{"type": "Polygon", "coordinates": (((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),)},
),
(
{"type": "Polygon", "coordinates": (((1, 0), (1, 1), (0, 1), (0, 0), (1, 0)),)},
{"type": "Polygon", "coordinates": (((1, 0), (1, 1), (0, 1), (0, 0), (1, 0)),)},
),
(
shapely.geometry.MultiPolygon([shapely.geometry.box(0, 0, 1, 1)]),
{"type": "MultiPolygon", "coordinates": [(((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),)]},
),
(
shapely.geometry.GeometryCollection([shapely.geometry.box(0, 0, 1, 1)]),
{"type": "GeometryCollection", "geometries": [
{"type": "Polygon", "coordinates": (((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),)}
]},
),
(
{
"type": "Feature", "properties": {},
"geometry": {"type": "Polygon", "coordinates": (((1, 0), (1, 1), (0, 1), (0, 0), (1, 0)),)},
},
{
"type": "Feature", "properties": {},
"geometry": {"type": "Polygon", "coordinates": (((1, 0), (1, 1), (0, 1), (0, 0), (1, 0)),)},
},
),
])
def test_mask_polygon_types(con100: Connection, polygon, expected_mask):
img = con100.load_collection("S2")
masked = img.mask_polygon(mask=polygon)
assert sorted(masked.graph.keys()) == ["loadcollection1", "maskpolygon1"]
assert masked.graph["maskpolygon1"] == {
"process_id": "mask_polygon",
"arguments": {
"data": {"from_node": "loadcollection1"},
'mask': {
'coordinates': (((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),),
'crs': {'properties': {'name': 'EPSG:4326'}, 'type': 'name'},
'type': 'Polygon'}
"mask": expected_mask
},
"result": True
}


def test_mask_polygon_with_crs(con100: Connection, recwarn):
img = con100.load_collection("S2")
polygon = shapely.geometry.box(0, 0, 1, 1)
masked = img.mask_polygon(mask=polygon, srs="EPSG:32631")
warnings = [str(w.message) for w in recwarn]
assert "Geometry with non-Lon-Lat CRS 'EPSG:32631' is only supported by specific back-ends." in warnings
assert sorted(masked.graph.keys()) == ["loadcollection1", "maskpolygon1"]
assert masked.graph["maskpolygon1"] == {
"process_id": "mask_polygon",
"arguments": {
"data": {"from_node": "loadcollection1"},
"mask": {
"type": "Polygon", "coordinates": (((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),),
"crs": {"type": "name", "properties": {"name": "EPSG:32631"}},
},
},
"result": True
}
Expand Down

0 comments on commit e344c4c

Please sign in to comment.