Skip to content

Commit

Permalink
PR #630 better "weak" format handling in execute_batch (#623, #401, #…
Browse files Browse the repository at this point in the history
…583, #391)

Make it more consistent with `download`
  • Loading branch information
soxofaan committed Sep 25, 2024
1 parent b5587fe commit 6eaa2d8
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 14 deletions.
21 changes: 14 additions & 7 deletions openeo/rest/datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -2150,7 +2150,9 @@ def download(
:return: None if the result is stored to disk, or a bytes object returned by the backend.
"""
weak_format = guess_format(outputfile) if outputfile else None
cube = self._ensure_save_result(format=format, options=options, weak_format=weak_format, method="Download")
cube = self._ensure_save_result(
format=format, options=options, weak_format=weak_format, method="DataCube.download()"
)
return self._connection.download(cube.flat_graph(), outputfile, validate=validate)

def validate(self) -> List[dict]:
Expand Down Expand Up @@ -2253,7 +2255,7 @@ def execute_batch(
connection_retry_interval: float = 30,
job_options: Optional[dict] = None,
validate: Optional[bool] = None,
# TODO: avoid `format_options` as keyword arguments
# TODO: deprecate `format_options` as keyword arguments
**format_options,
) -> BatchJob:
"""
Expand All @@ -2268,13 +2270,16 @@ def execute_batch(
:param validate: Optional toggle to enable/prevent validation of the process graphs before execution
(overruling the connection's ``auto_validate`` setting).
"""
# TODO: start showing deprecation warnings about these inconsistent argument names
if "format" in format_options and not out_format:
out_format = format_options["format"] # align with 'download' call arg name
if out_format is None and outputfile:
# TODO #401/#449 don't guess/override format if there is already a save_result with format?
out_format = guess_format(outputfile)

job = self.create_job(out_format=out_format, job_options=job_options, validate=validate, **format_options)
weak_format = guess_format(outputfile) if outputfile else None
cube = self._ensure_save_result(
format=out_format, options=format_options, weak_format=weak_format, method="DataCube.execute_batch()"
)

job = cube.create_job(job_options=job_options, validate=validate)
return job.run_synchronous(
outputfile=outputfile,
print=print, max_poll_interval=max_poll_interval, connection_retry_interval=connection_retry_interval
Expand Down Expand Up @@ -2316,7 +2321,9 @@ def create_job(
# TODO: add option to also automatically start the job?
# TODO: avoid using all kwargs as format_options
# TODO: centralize `create_job` for `DataCube`, `VectorCube`, `MlModel`, ...
cube = self._ensure_save_result(format=out_format, options=format_options or None, method="Creating job")
cube = self._ensure_save_result(
format=out_format, options=format_options or None, method="DataCube.create_job()"
)
return self._connection.create_job(
process_graph=cube.flat_graph(),
title=title,
Expand Down
41 changes: 39 additions & 2 deletions tests/rest/datacube/test_datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -755,7 +755,7 @@ def test_save_result_and_create_job_both_with_format(self, s2cube, save_result_f
with pytest.raises(
OpenEoClientException,
match=re.escape(
"Creating job with explicit output format 'NetCDF',"
"DataCube.create_job() with explicit output format 'NetCDF',"
" but the process graph already has `save_result` node(s)"
" which is ambiguous and should not be combined."
),
Expand Down Expand Up @@ -866,13 +866,50 @@ def test_execute_batch_existing_save_result_incompatible(
with pytest.raises(
OpenEoClientException,
match=re.escape(
"Creating job with explicit output format 'NetCDF',"
"DataCube.execute_batch() with explicit output format 'NetCDF',"
" but the process graph already has `save_result` node(s)"
" which is ambiguous and should not be combined."
),
):
cube.execute_batch(out_format=execute_format)

@pytest.mark.parametrize(
["save_result_format", "execute_output_file", "expected"],
[
(None, None, "GTiff"),
(None, "result.tiff", "GTiff"),
("GTiff", None, "GTiff"),
("GTiff", "result.csv", "GTiff"),
(None, "result.nc", "netCDF"),
("NetCDF", None, "NetCDF"),
("NetCDF", "result.csv", "NetCDF"),
(None, "result.csv", "CSV"),
],
)
def test_save_result_and_execute_batch_weak_format(
self,
s2cube,
get_create_job_pg,
save_result_format,
execute_output_file,
expected,
):
cube = s2cube
if save_result_format:
cube = cube.save_result(format=save_result_format)
cube.execute_batch(outputfile=execute_output_file)
pg = get_create_job_pg()
assert set(pg.keys()) == {"loadcollection1", "saveresult1"}
assert pg["saveresult1"] == {
"process_id": "save_result",
"arguments": {
"data": {"from_node": "loadcollection1"},
"format": expected,
"options": {},
},
"result": True,
}

def test_save_result_format_options_vs_create_job(elf, s2cube, get_create_job_pg):
"""https://github.com/Open-EO/openeo-python-client/issues/433"""
cube = s2cube.save_result(format="GTiff", options={"filename_prefix": "wwt-2023-02"})
Expand Down
10 changes: 5 additions & 5 deletions tests/rest/datacube/test_datacube100.py
Original file line number Diff line number Diff line change
Expand Up @@ -3266,23 +3266,23 @@ def test_apply_append_math_keep_context(con100):
"result.tiff",
{"format": "GTiff"},
OpenEoClientException(
"Download with explicit output format 'GTiff', but the process graph already has `save_result` node(s) which is ambiguous and should not be combined."
"DataCube.download() with explicit output format 'GTiff', but the process graph already has `save_result` node(s) which is ambiguous and should not be combined."
),
),
(
{"format": "netCDF"},
"result.tiff",
{"format": "NETCDF"},
OpenEoClientException(
"Download with explicit output format 'NETCDF', but the process graph already has `save_result` node(s) which is ambiguous and should not be combined."
"DataCube.download() with explicit output format 'NETCDF', but the process graph already has `save_result` node(s) which is ambiguous and should not be combined."
),
),
(
{"format": "netCDF"},
"result.json",
{"format": "JSON"},
OpenEoClientException(
"Download with explicit output format 'JSON', but the process graph already has `save_result` node(s) which is ambiguous and should not be combined."
"DataCube.download() with explicit output format 'JSON', but the process graph already has `save_result` node(s) which is ambiguous and should not be combined."
),
),
({"options": {}}, "result.tiff", {}, b"this is GTiff data"),
Expand All @@ -3291,15 +3291,15 @@ def test_apply_append_math_keep_context(con100):
"result.tiff",
{"options": {"quality": "low"}},
OpenEoClientException(
"Download with explicit output options {'quality': 'low'}, but the process graph already has `save_result` node(s) which is ambiguous and should not be combined."
"DataCube.download() with explicit output options {'quality': 'low'}, but the process graph already has `save_result` node(s) which is ambiguous and should not be combined."
),
),
(
{"options": {"colormap": "jet"}},
"result.tiff",
{"options": {"quality": "low"}},
OpenEoClientException(
"Download with explicit output options {'quality': 'low'}, but the process graph already has `save_result` node(s) which is ambiguous and should not be combined."
"DataCube.download() with explicit output options {'quality': 'low'}, but the process graph already has `save_result` node(s) which is ambiguous and should not be combined."
),
),
],
Expand Down

0 comments on commit 6eaa2d8

Please sign in to comment.