crim-ca · fmigneault · Sep 5, 2023 · Sep 1, 2023 · Sep 1, 2023 · Sep 1, 2023
diff --git a/.pylintrc b/.pylintrc
@@ -32,7 +32,8 @@ limit-inference-results=100
 # usually to register additional checkers.
 # https://pylint.pycqa.org/en/latest/technical_reference/extensions.html
 load-plugins=pylint.extensions.docparams,
-             pylint.extensions.mccabe
+             pylint.extensions.mccabe,
+             pylint_per_file_ignores
 
 # https://pylint.pycqa.org/en/latest/technical_reference/extensions.html#design-checker-options
 max-complexity = 24
@@ -125,6 +126,9 @@ disable=C0111,missing-docstring,
 
 # note: (C0412, ungrouped-imports) is managed via isort tool, ignore false positives
 
+per-file-ignores =
+  tests/*:R1729
+
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option
 # multiple time (only on the command line, not in the configuration file where

diff --git a/CHANGES.rst b/CHANGES.rst
@@ -12,11 +12,17 @@ Changes
 
 Changes:
 --------
-- No change.
+- Add the official `CWL` `JSON` schema reference
+  (`common-workflow-language/cwl-v1.2#256 <https://github.com/common-workflow-language/cwl-v1.2/pull/256>`_)
+  as ``$schema`` parameter returned in under the `OpenAPI` schema for the `CWL` component employed by `Weaver`
+  (fixes `#547 <https://github.com/crim-ca/weaver/issues/547>`_).
+- Add ``$schema`` field auto-insertion into the generated `OpenAPI` schema definition by ``CorniceSwagger`` when
+  corresponding ``colander.SchemaNode`` definitions contain a ``_schema = "<URL>"`` attribute
+  (fixes `#157 <https://github.com/crim-ca/weaver/issues/157>`_).
 
 Fixes:
 ------
-- No change.
+- Fix broken `OpenAPI` schema link references to `OGC API - Processes` repository.
 
 .. _changes_4.30.1:
 
@@ -725,7 +731,7 @@ Changes:
   The previous schema for deployment with nested ``process`` field remains supported for backward compatibility.
 
 .. |ogc-app-pkg| replace:: OGC Application Package
-.. _ogc-app-pkg: https://github.com/opengeospatial/ogcapi-processes/blob/master/extensions/deploy_replace_undeploy/standard/openapi/schemas/ogcapppkg.yaml
+.. _ogc-app-pkg: https://github.com/opengeospatial/ogcapi-processes/blob/master/openapi/schemas/processes-dru/ogcapppkg.yaml
 
 Fixes:
 ------

diff --git a/README.rst b/README.rst
@@ -331,7 +331,7 @@ It is part of `PAVICS`_ and `Birdhouse`_ ecosystems and is available within the
 .. |ogc-api-proc-part2| replace:: `OGC API - Processes - Part 2: Deploy, Replace, Undeploy`_ (DRU) extension
 .. _`OGC API - Processes - Part 2: Deploy, Replace, Undeploy`: https://github.com/opengeospatial/ogcapi-processes/tree/master/extensions/deploy_replace_undeploy
 .. |ogc-apppkg| replace:: `OGC Application Package`
-.. _ogc-apppkg: https://github.com/opengeospatial/ogcapi-processes/blob/master/extensions/deploy_replace_undeploy/standard/openapi/schemas/ogcapppkg.yaml
+.. _ogc-apppkg: https://github.com/opengeospatial/ogcapi-processes/blob/master/openapi/schemas/processes-dru/ogcapppkg.yaml
 .. _PAVICS: https://ouranosinc.github.io/pavics-sdi/index.html
 .. _Birdhouse: http://bird-house.github.io/
 .. _birdhouse-deploy: https://github.com/bird-house/birdhouse-deploy

diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -25,6 +25,7 @@ pytest<7
 pydocstyle
 pylint>=2.11,!=2.12,<2.14; python_version <= "3.6"
 pylint>=2.15.4; python_version >= "3.7"
+pylint-per-file-ignores; python_version >= "3.7"
 pylint_quotes
 responses
 safety

diff --git a/tests/functional/test_cli.py b/tests/functional/test_cli.py
@@ -194,6 +194,7 @@ def test_processes_with_providers(self):
         providers = result.body["providers"]
         for prov in providers:
             prov.pop("$schema", None)
+            prov.pop("$id", None)
         assert providers == [
             {"id": prov1.name, "processes": resources.TEST_EMU_WPS1_PROCESSES},
             {"id": prov2.name, "processes": resources.TEST_HUMMINGBIRD_WPS1_PROCESSES},
@@ -379,6 +380,7 @@ def test_describe(self):
         output_formats = result.body["outputs"]["output"]["formats"]
         for out_fmt in output_formats:
             out_fmt.pop("$schema", None)
+            out_fmt.pop("$id", None)
         assert output_formats == [{"default": True, "mediaType": ContentType.TEXT_PLAIN}]
         assert "undefined" not in result.message, "CLI should not have confused process description as response detail."
         assert result.body["description"] == (
@@ -459,7 +461,7 @@ def test_execute_manual_monitor_status_and_download_results(self):
         Test a typical case of :term:`Job` execution, result retrieval and download, but with manual monitoring.
 
         Manual monitoring can be valid in cases where a *very* long :term:`Job` must be executed, and the user does
-        not intend to wait after it. This avoids leaving some shell/notebook/etc. open of a long time and provide a
+        not intend to wait for it. This avoids leaving some shell/notebook/etc. open of a long time and provide a
         massive ``timeout`` value. Instead, the user can simply re-call :meth:`WeaverClient.monitor` at a later time
         to resume monitoring. Other situation can be if the connection was dropped or script runner crashed, and the
         want to pick up monitoring again.
@@ -1198,10 +1200,11 @@ def test_deploy_payload_process_info_merged(self):
             out_oas_fmt = {"default": True, "mediaType": ContentType.APP_JSON}
             out_any_fmt = [out_cwl_fmt, out_oas_fmt]
             # ignore schema specifications for comparison only of contents
-            in_schema.pop("$schema", None)
-            out_schema.pop("$schema", None)
-            for out_fmt in out_formats:
-                out_fmt.pop("$schema", None)
+            for field in ["$id", "$schema"]:
+                in_schema.pop(field, None)
+                out_schema.pop(field, None)
+                for out_fmt in out_formats:
+                    out_fmt.pop(field, None)
             # if any of the below definitions don't include user-provided information,
             # CLI did not combine it as intended prior to sending deployment request
             assert in_schema == in_oas  # injected by user provided process description

diff --git a/tests/functional/test_docker_app.py b/tests/functional/test_docker_app.py
@@ -13,6 +13,7 @@
 from weaver.processes.wps_package import CWL_REQUIREMENT_APP_DOCKER
 from weaver.utils import fetch_file, get_any_value, load_file, str2bytes
 from weaver.wps.utils import get_wps_url
+from weaver.wps_restapi.swagger_definitions import CWL_SCHEMA_URL
 from weaver.wps_restapi.utils import get_wps_restapi_base_url
 
 
@@ -119,8 +120,17 @@ def test_deployed_process_schemas(self):
         # process already deployed by setUpClass
         body = self.get_deploy_body()
         process = self.process_store.fetch_by_id(self.process_id)
-        assert process.package == body["executionUnit"][0]["unit"]
-        assert process.payload == body
+        assert "$id" in process.package
+        assert process.package["$id"] == CWL_SCHEMA_URL
+
+        payload = process.payload
+        payload.pop("$schema", None)
+        payload.pop("$id", None)
+        package = process.package
+        package.pop("$schema", None)
+        package.pop("$id", None)
+        assert package == body["executionUnit"][0]["unit"]
+        assert payload == body
 
     def test_execute_wps_rest_resp_json(self):
         """

diff --git a/tests/test_schemas.py b/tests/test_schemas.py
@@ -127,6 +127,7 @@ def test_format_variations(test_format, expect_format):
     try:
         result_format = format_schema.deserialize(test_format)
         result_format.pop("$schema", None)
+        result_format.pop("$id", None)
         assert result_format == expect_format
     except colander.Invalid:
         pytest.fail(f"Expected format to be valid: [{test_format}]")
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -647,7 +647,7 @@ def mock_sleep(delay):
 
     # since backoff factor multiplies all incrementally increasing delays between requests,
     # proper detection of input backoff=0 makes all sleep calls equal to zero
-    assert all(backoff == 0 for backoff in sleep_counter["called_with"])
+    assert all([backoff == 0 for backoff in sleep_counter["called_with"]])
     assert sleep_counter["called_count"] == 3  # first direct call doesn't have any sleep from retry
 
 

diff --git a/tests/wps_restapi/test_api.py b/tests/wps_restapi/test_api.py
@@ -114,6 +114,13 @@ def test_swagger_api_format(self):
         assert "openapi" in resp.json
         assert "basePath" in resp.json
 
+    def test_openapi_includes_schema(self):
+        resp = self.testapp.get(sd.openapi_json_service.path, headers=self.json_headers)
+        assert resp.status_code == 200
+        body = resp.json
+        assert "$id" in body["components"]["schemas"]["CWL"]
+        assert body["components"]["schemas"]["CWL"]["$id"] == sd.CWL_SCHEMA_URL
+
     def test_status_unauthorized_and_forbidden(self):
         """
         Validates that 401/403 status codes are correctly handled and that the appropriate one is returned.

diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py
@@ -1494,6 +1494,7 @@ def test_jobs_inputs_outputs_validations(self):
 
         job_none = sd.Execute().deserialize({})
         job_none.pop("$schema", None)
+        job_none.pop("$id", None)
         assert job_none == {
             "inputs": {},
             "outputs": {},
@@ -1503,6 +1504,7 @@ def test_jobs_inputs_outputs_validations(self):
 
         job_in_none = sd.Execute().deserialize({"outputs": {"random": default_trans_mode}})
         job_in_none.pop("$schema", None)
+        job_in_none.pop("$id", None)
         assert job_in_none == {
             "inputs": {},
             "outputs": {"random": default_trans_mode},
@@ -1512,6 +1514,7 @@ def test_jobs_inputs_outputs_validations(self):
 
         job_in_empty_dict = sd.Execute().deserialize({"inputs": {}, "outputs": {"random": default_trans_mode}})
         job_in_empty_dict.pop("$schema", None)
+        job_in_empty_dict.pop("$id", None)
         assert job_in_empty_dict == {
             "inputs": {},
             "outputs": {"random": default_trans_mode},
@@ -1521,6 +1524,7 @@ def test_jobs_inputs_outputs_validations(self):
 
         job_in_empty_list = sd.Execute().deserialize({"inputs": [], "outputs": {"random": default_trans_mode}})
         job_in_empty_list.pop("$schema", None)
+        job_in_empty_list.pop("$id", None)
         assert job_in_empty_list == {
             "inputs": [],
             "outputs": {"random": default_trans_mode},
@@ -1530,6 +1534,7 @@ def test_jobs_inputs_outputs_validations(self):
 
         job_out_none = sd.Execute().deserialize({"inputs": {"random": "ok"}})
         job_out_none.pop("$schema", None)
+        job_out_none.pop("$id", None)
         assert job_out_none == {
             "inputs": {"random": "ok"},
             "outputs": {},
@@ -1539,6 +1544,7 @@ def test_jobs_inputs_outputs_validations(self):
 
         job_out_empty_dict = sd.Execute().deserialize({"inputs": {"random": "ok"}, "outputs": {}})
         job_out_empty_dict.pop("$schema", None)
+        job_out_empty_dict.pop("$id", None)
         assert job_out_empty_dict == {
             "inputs": {"random": "ok"},
             "outputs": {},
@@ -1548,6 +1554,7 @@ def test_jobs_inputs_outputs_validations(self):
 
         job_out_empty_list = sd.Execute().deserialize({"inputs": {"random": "ok"}, "outputs": []})
         job_out_empty_list.pop("$schema", None)
+        job_out_empty_list.pop("$id", None)
         assert job_out_empty_list == {
             "inputs": {"random": "ok"},
             "outputs": [],
@@ -1560,6 +1567,7 @@ def test_jobs_inputs_outputs_validations(self):
             "outputs": {"random": {"transmissionMode": ExecuteTransmissionMode.REFERENCE}}
         })
         job_out_defined.pop("$schema", None)
+        job_out_defined.pop("$id", None)
         assert job_out_defined == {
             "inputs": {"random": "ok"},
             "outputs": {"random": {"transmissionMode": ExecuteTransmissionMode.REFERENCE}},

diff --git a/tests/wps_restapi/test_processes.py b/tests/wps_restapi/test_processes.py
@@ -977,6 +977,10 @@ def test_deploy_process_CWL_DockerRequirement_executionUnit(self):
             cwl_out = cwl["outputs"]["output"]
             cwl_out["id"] = "output"
             cwl["outputs"] = [cwl_out]
+            cwl.pop("$schema", None)
+            cwl.pop("$id", None)
+            pkg.pop("$schema", None)
+            pkg.pop("$id", None)
             assert pkg == cwl
 
             # process description should have been generated with relevant I/O

diff --git a/weaver/datatype.py b/weaver/datatype.py
@@ -2510,9 +2510,9 @@ def offering(self, schema=ProcessSchema.OGC, request=None):
             for io_type in ["inputs", "outputs"]:
                 process[io_type] = normalize_ordered_io(process[io_type], io_hints[io_type])
             process.update({"process": dict(process)})
-            return sd.ProcessDescriptionOLD().deserialize(process)
+            return sd.ProcessDescriptionOLD(schema_meta_include=True).deserialize(process)
         # process fields directly at root + I/O as mappings
-        return sd.ProcessDescriptionOGC().deserialize(process)
+        return sd.ProcessDescriptionOGC(schema_meta_include=True).deserialize(process)
 
     def summary(self, revision=False, links=True, container=None):
         # type: (bool, bool, Optional[AnySettingsContainer]) -> JSON

diff --git a/weaver/processes/utils.py b/weaver/processes/utils.py
@@ -195,7 +195,7 @@ def _check_deploy(payload):
     """
     message = "Process deployment definition is invalid."
     try:
-        results = sd.Deploy().deserialize(payload)
+        results = sd.Deploy(schema_meta_include=False, schema_include=False).deserialize(payload)
         # Because many fields are optional during deployment to allow flexibility between compatible WPS/CWL
         # definitions, any invalid field at lower-level could make a full higher-level definition to be dropped.
         # Verify the result to ensure this was not the case for known cases to attempt early detection.
@@ -239,7 +239,10 @@ def _check_deploy(payload):
         r_exec_unit = results.get("executionUnit", [{}])
         if p_exec_unit and p_exec_unit != r_exec_unit:
             message = "Process deployment execution unit is invalid."
-            d_exec_unit = sd.ExecutionUnitList().deserialize(p_exec_unit)  # raises directly if caused by invalid schema
+            d_exec_unit = sd.ExecutionUnitList(
+                schema_meta_include=False,
+                schema_include=False,
+            ).deserialize(p_exec_unit)  # raises directly if caused by invalid schema
             if r_exec_unit != d_exec_unit:  # otherwise raise a generic error, don't allow differing definitions
                 message = (
                     "Process deployment execution unit resolved as valid definition but differs from submitted "
@@ -352,6 +355,7 @@ def deploy_process_from_payload(payload, container, overwrite=False):  # pylint:
     payload_copy = deepcopy(payload)
     payload = _check_deploy(payload)
     payload.pop("$schema", None)
+    payload.pop("$id", None)
 
     # validate identifier naming for unsupported characters
     process_desc = payload.get("processDescription", {})  # empty possible if CWL directly passed
@@ -459,6 +463,7 @@ def deploy_process_from_payload(payload, container, overwrite=False):  # pylint:
     # remove schema to avoid later deserialization error if different, but remaining content is valid
     # also, avoid storing this field in the process object, regenerate it as needed during responses
     process_info.pop("$schema", None)
+    process_info.pop("$id", None)
 
     try:
         process = Process(process_info)  # if 'version' was provided in deploy info, it will be added as hint here