diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b003954a..77af2d694 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Added `DataCube.load_stac()` to also support creating a `load_stac` based cube without a connection ([#638](https://github.com/Open-EO/openeo-python-client/issues/638)) + ### Changed ### Removed diff --git a/openeo/rest/connection.py b/openeo/rest/connection.py index 3e132fc3f..25ba304ac 100644 --- a/openeo/rest/connection.py +++ b/openeo/rest/connection.py @@ -44,7 +44,6 @@ CollectionMetadata, SpatialDimension, TemporalDimension, - metadata_from_stac, ) from openeo.rest import ( DEFAULT_DOWNLOAD_CHUNK_SIZE, @@ -1415,26 +1414,14 @@ def load_stac( Argument ``temporal_extent``: add support for year/month shorthand notation as discussed at :ref:`date-shorthand-handling`. """ - # TODO #425 move this implementation to `DataCube` and just forward here (like with `load_collection`) - # TODO #425 detect actual metadata from URL - arguments = {"url": url} - # TODO #425 more normalization/validation of extent/band parameters - if spatial_extent: - arguments["spatial_extent"] = spatial_extent - if temporal_extent: - arguments["temporal_extent"] = DataCube._get_temporal_extent(extent=temporal_extent) - if bands: - arguments["bands"] = bands - if properties: - arguments["properties"] = { - prop: build_child_callback(pred, parent_parameters=["value"]) for prop, pred in properties.items() - } - cube = self.datacube_from_process(process_id="load_stac", **arguments) - try: - cube.metadata = metadata_from_stac(url) - except Exception: - _log.warning(f"Failed to extract cube metadata from STAC URL {url}", exc_info=True) - return cube + return DataCube.load_stac( + url=url, + spatial_extent=spatial_extent, + temporal_extent=temporal_extent, + bands=bands, + properties=properties, + connection=self, + ) def load_stac_from_job( self, diff --git a/openeo/rest/datacube.py b/openeo/rest/datacube.py index 06dfb1c92..fe80c79c0 100644 --- a/openeo/rest/datacube.py +++ b/openeo/rest/datacube.py @@ -40,6 +40,7 @@ CollectionMetadata, SpatialDimension, TemporalDimension, + metadata_from_stac, ) from openeo.processes import ProcessBuilder from openeo.rest import BandMathException, OpenEoClientException, OperatorException @@ -84,7 +85,7 @@ class DataCube(_ProcessGraphAbstraction): # TODO: set this based on back-end or user preference? _DEFAULT_RASTER_FORMAT = "GTiff" - def __init__(self, graph: PGNode, connection: Connection, metadata: Optional[CollectionMetadata] = None): + def __init__(self, graph: PGNode, connection: Optional[Connection], metadata: Optional[CollectionMetadata] = None): super().__init__(pgnode=graph, connection=connection) self.metadata: Optional[CollectionMetadata] = metadata @@ -260,6 +261,133 @@ def load_disk_collection(cls, connection: Connection, file_format: str, glob_pat ) return cls(graph=pg, connection=connection) + @classmethod + def load_stac( + cls, + url: str, + spatial_extent: Union[Dict[str, float], Parameter, None] = None, + temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None, + bands: Optional[List[str]] = None, + properties: Optional[Dict[str, Union[str, PGNode, Callable]]] = None, + connection: Optional[Connection] = None, + ) -> DataCube: + """ + Loads data from a static STAC catalog or a STAC API Collection and returns the data as a processable :py:class:`DataCube`. + A batch job result can be loaded by providing a reference to it. + + If supported by the underlying metadata and file format, the data that is added to the data cube can be + restricted with the parameters ``spatial_extent``, ``temporal_extent`` and ``bands``. + If no data is available for the given extents, a ``NoDataAvailable`` error is thrown. + + Remarks: + + * The bands (and all dimensions that specify nominal dimension labels) are expected to be ordered as + specified in the metadata if the ``bands`` parameter is set to ``null``. + * If no additional parameter is specified this would imply that the whole data set is expected to be loaded. + Due to the large size of many data sets, this is not recommended and may be optimized by back-ends to only + load the data that is actually required after evaluating subsequent processes such as filters. + This means that the values should be processed only after the data has been limited to the required extent + and as a consequence also to a manageable size. + + + :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) + or a specific STAC API Collection that allows to filter items and to download assets. + This includes batch job results, which itself are compliant to STAC. + For external URLs, authentication details such as API keys or tokens may need to be included in the URL. + + Batch job results can be specified in two ways: + + - For Batch job results at the same back-end, a URL pointing to the corresponding batch job results + endpoint should be provided. The URL usually ends with ``/jobs/{id}/results`` and ``{id}`` + is the corresponding batch job ID. + - For external results, a signed URL must be provided. Not all back-ends support signed URLs, + which are provided as a link with the link relation `canonical` in the batch job result metadata. + :param spatial_extent: + Limits the data to load to the specified bounding box or polygons. + + For raster data, the process loads the pixel into the data cube if the point at the pixel center intersects + with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). + + For vector data, the process loads the geometry into the data cube if the geometry is fully within the + bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). + Empty geometries may only be in the data cube if no spatial extent has been provided. + + The GeoJSON can be one of the following feature types: + + * A ``Polygon`` or ``MultiPolygon`` geometry, + * a ``Feature`` with a ``Polygon`` or ``MultiPolygon`` geometry, or + * a ``FeatureCollection`` containing at least one ``Feature`` with ``Polygon`` or ``MultiPolygon`` geometries. + + Set this parameter to ``None`` to set no limit for the spatial extent. + Be careful with this when loading large datasets. It is recommended to use this parameter instead of + using ``filter_bbox()`` or ``filter_spatial()`` directly after loading unbounded data. + + :param temporal_extent: + Limits the data to load to the specified left-closed temporal interval. + Applies to all temporal dimensions. + The interval has to be specified as an array with exactly two elements: + + 1. The first element is the start of the temporal interval. + The specified instance in time is **included** in the interval. + 2. The second element is the end of the temporal interval. + The specified instance in time is **excluded** from the interval. + + The second element must always be greater/later than the first element. + Otherwise, a `TemporalExtentEmpty` exception is thrown. + + Also supports open intervals by setting one of the boundaries to ``None``, but never both. + + Set this parameter to ``None`` to set no limit for the temporal extent. + Be careful with this when loading large datasets. It is recommended to use this parameter instead of + using ``filter_temporal()`` directly after loading unbounded data. + + :param bands: + Only adds the specified bands into the data cube so that bands that don't match the list + of band names are not available. Applies to all dimensions of type `bands`. + + Either the unique band name (metadata field ``name`` in bands) or one of the common band names + (metadata field ``common_name`` in bands) can be specified. + If the unique band name and the common name conflict, the unique band name has a higher priority. + + The order of the specified array defines the order of the bands in the data cube. + If multiple bands match a common name, all matched bands are included in the original order. + + It is recommended to use this parameter instead of using ``filter_bands()`` directly after loading unbounded data. + + :param properties: + Limits the data by metadata properties to include only data in the data cube which + all given conditions return ``True`` for (AND operation). + + Specify key-value-pairs with the key being the name of the metadata property, + which can be retrieved with the openEO Data Discovery for Collections. + The value must be a condition (user-defined process) to be evaluated against a STAC API. + This parameter is not supported for static STAC. + + :param connection: The connection to use to connect with the backend. + + .. versionadded:: 0.33.0 + + """ + arguments = {"url": url} + # TODO #425 more normalization/validation of extent/band parameters + if spatial_extent: + arguments["spatial_extent"] = spatial_extent + if temporal_extent: + arguments["temporal_extent"] = DataCube._get_temporal_extent(extent=temporal_extent) + if bands: + arguments["bands"] = bands + if properties: + arguments["properties"] = { + prop: build_child_callback(pred, parent_parameters=["value"]) for prop, pred in properties.items() + } + graph = PGNode("load_stac", arguments=arguments) + try: + metadata = metadata_from_stac(url) + except Exception: + log.warning(f"Failed to extract cube metadata from STAC URL {url}", exc_info=True) + metadata = None + return cls(graph=graph, connection=connection, metadata=metadata) + @classmethod def _get_temporal_extent( cls, diff --git a/tests/rest/datacube/test_datacube.py b/tests/rest/datacube/test_datacube.py index 74a0f3cd3..6b0da6e8b 100644 --- a/tests/rest/datacube/test_datacube.py +++ b/tests/rest/datacube/test_datacube.py @@ -83,6 +83,19 @@ def _get_leaf_node(cube, force_flat=True) -> dict: class TestDataCube: + def test_load_stac_connectionless(self, connection): + expected_graph = { + "loadstac1": { + "process_id": "load_stac", + "arguments": {"url": "https://provider.test/dataset"}, + "result": True, + } + } + cube = DataCube.load_stac("https://provider.test/dataset") + assert cube.flat_graph() == expected_graph + cube2 = connection.load_stac("https://provider.test/dataset") + assert cube2.flat_graph() == expected_graph + def test_load_collection_connectionless_basic(self): cube = DataCube.load_collection("T3") assert cube.flat_graph() == {