diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml index 94be7b6..1a414d3 100644 --- a/.github/workflows/integration_tests.yml +++ b/.github/workflows/integration_tests.yml @@ -30,6 +30,7 @@ jobs: pip install -e ".[test]" - name: MCP Venue Dev - Integration tests + id: mcp_venue_dev_integration_tests continue-on-error: true env: AIRFLOW_WEBSERVER_PASSWORD: ${{ secrets.MCP_VENUE_DEV_AIRFLOW_WEBSERVER_PASSWORD }} @@ -39,6 +40,7 @@ jobs: --airflow-endpoint=${{ github.event.inputs.MCP_VENUE_DEV_AIRFLOW_ENDPOINT || vars.MCP_VENUE_DEV_AIRFLOW_ENDPOINT }} # - name: MCP Venue Test - Integration tests + # id: mcp_venue_test_integration_tests # continue-on-error: true # run: | # pytest -vv --gherkin-terminal-reporter \ @@ -58,7 +60,7 @@ jobs: exit 1 fi - # Uncomment this block if MCP Venue Test Integration tests are re-enabled + # Uncomment this block when MCP Venue Test Integration tests are re-enabled # if [ "$test_status" != "success" ]; then # echo "MCP Venue Test Integration Tests failed." # exit 1 diff --git a/.github/workflows/smoke_tests.yml b/.github/workflows/smoke_tests.yml index 7006ede..d7326d9 100644 --- a/.github/workflows/smoke_tests.yml +++ b/.github/workflows/smoke_tests.yml @@ -57,16 +57,17 @@ jobs: --airflow-endpoint=${{ github.event.inputs.MCP_VENUE_DEV_AIRFLOW_ENDPOINT || vars.MCP_VENUE_DEV_AIRFLOW_ENDPOINT }} \ --ogc-processes-endpoint=${{ github.event.inputs.MCP_VENUE_DEV_OGC_PROCESSES_ENDPOINT || vars.MCP_VENUE_DEV_OGC_PROCESSES_ENDPOINT }} - - name: MCP Venue Test - Smoke tests - id: mcp_venue_test_smoke_tests - env: - AIRFLOW_WEBSERVER_PASSWORD: ${{ secrets.MCP_VENUE_TEST_AIRFLOW_WEBSERVER_PASSWORD }} - continue-on-error: true - run: | - pytest -vv --gherkin-terminal-reporter \ - unity-test/system/smoke \ - --airflow-endpoint=${{ github.event.inputs.MCP_VENUE_TEST_AIRFLOW_ENDPOINT || vars.MCP_VENUE_TEST_AIRFLOW_ENDPOINT }} \ - --ogc-processes-endpoint=${{ github.event.inputs.MCP_VENUE_TEST_OGC_PROCESSES_ENDPOINT || vars.MCP_VENUE_TEST_OGC_PROCESSES_ENDPOINT }} +# Temporary: comment out checks on MCP venue test until the SPS is redeployed +# - name: MCP Venue Test - Smoke tests +# id: mcp_venue_test_smoke_tests +# env: +# AIRFLOW_WEBSERVER_PASSWORD: ${{ secrets.MCP_VENUE_TEST_AIRFLOW_WEBSERVER_PASSWORD }} +# continue-on-error: true +# run: | +# pytest -vv --gherkin-terminal-reporter \ +# unity-test/system/smoke \ +# --airflow-endpoint=${{ github.event.inputs.MCP_VENUE_TEST_AIRFLOW_ENDPOINT || vars.MCP_VENUE_TEST_AIRFLOW_ENDPOINT }} \ +# --ogc-processes-endpoint=${{ github.event.inputs.MCP_VENUE_TEST_OGC_PROCESSES_ENDPOINT || vars.MCP_VENUE_TEST_OGC_PROCESSES_ENDPOINT }} - name: MCP Venue Ops - Smoke tests id: mcp_venue_ops_smoke_tests @@ -77,8 +78,9 @@ jobs: pytest -vv --gherkin-terminal-reporter \ unity-test/system/smoke/step_defs/test_airflow_api_health.py \ --airflow-endpoint=${{ github.event.inputs.MCP_VENUE_OPS_AIRFLOW_ENDPOINT || vars.MCP_VENUE_OPS_AIRFLOW_ENDPOINT }} \ + --ogc-processes-endpoint=${{ github.event.inputs.MCP_VENUE_OPS_OGC_PROCESSES_ENDPOINT || vars.MCP_VENUE_OPS_OGC_PROCESSES_ENDPOINT }} - - name: MCP Venue SBG Dev - Smoke tests + - name: MCP SBG DEV - Smoke tests id: mcp_sbg_dev_smoke_tests env: AIRFLOW_WEBSERVER_PASSWORD: ${{ secrets.MCP_VENUE_SBG_DEV_AIRFLOW_WEBSERVER_PASSWORD }} @@ -94,22 +96,18 @@ jobs: if: always() run: | dev_status=${{ steps.mcp_venue_dev_smoke_tests.outcome }} - test_status=${{ steps.mcp_venue_test_smoke_tests.outcome }} ops_status=${{ steps.mcp_venue_ops_smoke_tests.outcome }} sbg_dev_status=${{ steps.mcp_sbg_dev_smoke_tests.outcome }} echo "Dev Smoke Tests: $dev_status" - echo "Test Smoke Tests: $test_status" echo "Ops Smoke Tests: $ops_status" echo "SBG Dev Smoke Tests: $sbg_dev_status" - if [ "$dev_status" != "success" ] || [ "$test_status" != "success" ] || [ "$ops_status" != "success" ] || [ "$sbg_dev_status" != "success" ]; then + # FIXME: must re-enable [ "$test_status" != "success" ] + if [ "$dev_status" != "success" ] || [ "$ops_status" != "success" ] || [ "$sbg_dev_status" != "success" ]; then echo "One or more smoke tests failed." if [ "$dev_status" != "success" ]; then echo "MCP Venue Dev Smoke Tests failed." fi - if [ "$test_status" != "success" ]; then - echo "MCP Venue Test Smoke Tests failed." - fi if [ "$ops_status" != "success" ]; then echo "MCP Venue Ops Smoke Tests failed." fi diff --git a/airflow/dags/cwl_dag.py b/airflow/dags/cwl_dag.py index 16d47b6..dfb8fbe 100644 --- a/airflow/dags/cwl_dag.py +++ b/airflow/dags/cwl_dag.py @@ -40,18 +40,20 @@ DEFAULT_CWL_ARGUMENTS = json.dumps({"message": "Hello Unity"}) # Alternative arguments to execute SBG Pre-Process -# DEFAULT_CWL_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/preprocess/sbg-preprocess-workflow.cwl" +# DEFAULT_CWL_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/preprocess/sbg-preprocess-workflow.cwl" # DEFAULT_CWL_ARGUMENTS = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/preprocess/sbg-preprocess-workflow.dev.yml" # Alternative arguments to execute SBG end-to-end -# DEFAULT_CWL_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/preprocess/sbg-preprocess-workflow.cwl" -# DEFAULT_CWL_ARGUMENTS = json.dumps({"input_processing_labels": ["label1", "label2"], "input_cmr_stac": "https://cmr.earthdata.nasa.gov/search/granules.stac?collection_concept_id=C2408009906-LPCLOUD&temporal[]=2023-08-10T03:41:03.000Z,2023-08-10T03:41:03.000Z", "input_unity_dapa_client": "40c2s0ulbhp9i0fmaph3su9jch", "input_unity_dapa_api": "https://d3vc8w9zcq658.cloudfront.net", "input_crid": "001", "output_collection_id": "urn:nasa:unity:unity:dev:SBG-L1B_PRE___1", "output_data_bucket": "sps-dev-ds-storage"}) +# DEFAULT_CWL_WORKFLOW = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/L1-to-L2-e2e.cwl" +# DEFAULT_CWL_ARGUMENTS = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/L1-to-L2-e2e.dev.yml" +# Alternative arguments to execute SBG end-to-end +# unity_sps_sbg_debug.txt CONTAINER_RESOURCES = k8s.V1ResourceRequirements( requests={ # "cpu": "2660m", # 2.67 vCPUs, specified in milliCPUs # "memory": "22Gi", # Rounded to 22 GiB for easier specification - "ephemeral-storage": "30Gi" + "ephemeral-storage": "10Gi" }, # limits={ # # "cpu": "2660m", # Optional: set the same as requests if you want a fixed allocation @@ -120,7 +122,7 @@ def setup(ti=None, **context): startup_timeout_seconds=1800, arguments=["{{ params.cwl_workflow }}", "{{ params.cwl_args }}"], container_security_context={"privileged": True}, - # container_resources=CONTAINER_RESOURCES, + container_resources=CONTAINER_RESOURCES, container_logs=True, volume_mounts=[ k8s.V1VolumeMount(name="workers-volume", mount_path=WORKING_DIR, sub_path="{{ dag_run.run_id }}") diff --git a/airflow/dags/sbg_L1_to_L2_e2e_cwl_dag.py b/airflow/dags/sbg_L1_to_L2_e2e_cwl_dag.py index 4f2803c..d0f1901 100644 --- a/airflow/dags/sbg_L1_to_L2_e2e_cwl_dag.py +++ b/airflow/dags/sbg_L1_to_L2_e2e_cwl_dag.py @@ -33,7 +33,7 @@ INPUT_AUX_STAC = '{"numberMatched":{"total_size":1},"numberReturned":1,"stac_version":"1.0.0","type":"FeatureCollection","links":[{"rel":"self","href":"https://d3vc8w9zcq658.cloudfront.net/am-uds-dapa/collections/urn:nasa:unity:unity:dev:SBG-L1B_PRE___1/items?limit=10"},{"rel":"root","href":"https://d3vc8w9zcq658.cloudfront.net"}],"features":[{"type":"Feature","stac_version":"1.0.0","id":"urn:nasa:unity:unity:dev:SBG-AUX___1:sRTMnet_v120","properties":{"datetime":"2024-02-14T22:04:41.078000Z","start_datetime":"2024-01-03T13:19:36Z","end_datetime":"2024-01-03T13:19:48Z","created":"2024-01-03T13:19:36Z","updated":"2024-02-14T22:05:25.248000Z","status":"completed","provider":"unity"},"geometry":{"type":"Point","coordinates":[0,0]},"links":[{"rel":"collection","href":"."}],"assets":{"sRTMnet_v120.h5":{"href":"s3://sps-dev-ds-storage/urn:nasa:unity:unity:dev:SBG-AUX___1/urn:nasa:unity:unity:dev:SBG-AUX___1:sRTMnet_v120.h5/sRTMnet_v120.h5","title":"sRTMnet_v120.h5","description":"size=-1;checksumType=md5;checksum=unknown;","roles":["data"]},"sRTMnet_v120_aux.npz":{"href":"s3://sps-dev-ds-storage/urn:nasa:unity:unity:dev:SBG-AUX___1/urn:nasa:unity:unity:dev:SBG-AUX___1:sRTMnet_v120.h5/sRTMnet_v120_aux.npz","title":"sRTMnet_v120_aux.npz","description":"size=-1;checksumType=md5;checksum=unknown;","roles":["data"]}},"bbox":[-180,-90,180,90],"stac_extensions":[],"collection":"urn:nasa:unity:unity:dev:SBG-AUX___1"}]}' dag = DAG( - dag_id="sbg-l1-to-l2-e2e-cwl-dag", + dag_id="sbg_L1_to_L2_e2e_cwl_dag", description="SBG L1 to L2 End-To-End Workflow as CWL", tags=["SBG", "Unity", "SPS", "NASA", "JPL"], is_paused_upon_creation=False, diff --git a/airflow/dags/sbg_L1_to_L2_e2e_cwl_step_by_step_dag.py b/airflow/dags/sbg_L1_to_L2_e2e_cwl_step_by_step_dag.py index d64f021..4949917 100644 --- a/airflow/dags/sbg_L1_to_L2_e2e_cwl_step_by_step_dag.py +++ b/airflow/dags/sbg_L1_to_L2_e2e_cwl_step_by_step_dag.py @@ -365,7 +365,7 @@ def setup(ti=None, **context): "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/preprocess/sbg-preprocess-workflow.cwl" ) preprocess_task = KubernetesPodOperator( - retries=5, + retries=0, task_id="SBG_Preprocess", namespace=POD_NAMESPACE, name="sbg-preprocess-pod", @@ -373,7 +373,7 @@ def setup(ti=None, **context): service_account_name="airflow-worker", in_cluster=True, get_logs=True, - startup_timeout_seconds=1200, + startup_timeout_seconds=1800, arguments=[SBG_PREPROCESS_CWL, "{{ti.xcom_pull(task_ids='Setup', key='preprocess_args')}}"], container_security_context={"privileged": True}, container_resources=CONTAINER_RESOURCES, @@ -403,7 +403,7 @@ def setup(ti=None, **context): ) isofit_task = KubernetesPodOperator( # wait_until_job_complete=True, - retries=5, + retries=0, task_id="SBG_Isofit", namespace=POD_NAMESPACE, name="sbg-isofit", @@ -411,7 +411,7 @@ def setup(ti=None, **context): service_account_name="airflow-worker", in_cluster=True, get_logs=True, - startup_timeout_seconds=1200, + startup_timeout_seconds=1800, arguments=[SBG_ISOFIT_CWL, "{{ti.xcom_pull(task_ids='Setup', key='isofit_args')}}"], container_security_context={"privileged": True}, container_resources=CONTAINER_RESOURCES, @@ -441,7 +441,7 @@ def setup(ti=None, **context): ) resample_task = KubernetesPodOperator( # wait_until_job_complete=True,=True, - retries=5, + retries=0, task_id="SBG_Resample", namespace=POD_NAMESPACE, name="sbg-resample-pod", @@ -449,7 +449,7 @@ def setup(ti=None, **context): service_account_name="airflow-worker", in_cluster=True, get_logs=True, - startup_timeout_seconds=1200, + startup_timeout_seconds=1800, arguments=[SBG_RESAMPLE_CWL, "{{ti.xcom_pull(task_ids='Setup', key='resample_args')}}"], container_security_context={"privileged": True}, container_resources=CONTAINER_RESOURCES, @@ -477,7 +477,7 @@ def setup(ti=None, **context): SBG_REFLECT_CORRECT_CWL = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/reflect-correct/sbg-reflect-correct-workflow.cwl" reflect_correct_task = KubernetesPodOperator( # wait_until_job_complete=True,=True, - retries=5, + retries=0, task_id="SBG_Reflect", namespace=POD_NAMESPACE, name="sbg-reflect-pod", @@ -485,7 +485,7 @@ def setup(ti=None, **context): service_account_name="airflow-worker", in_cluster=True, get_logs=True, - startup_timeout_seconds=1200, + startup_timeout_seconds=1800, arguments=[SBG_REFLECT_CORRECT_CWL, "{{ti.xcom_pull(task_ids='Setup', key='reflect_correct_args')}}"], container_security_context={"privileged": True}, container_resources=CONTAINER_RESOURCES, @@ -515,7 +515,7 @@ def setup(ti=None, **context): ) frcover_task = KubernetesPodOperator( # wait_until_job_complete=True,=True, - retries=5, + retries=0, task_id="SBG_Frcover", namespace=POD_NAMESPACE, name="sbg-frcover-pod", @@ -523,7 +523,7 @@ def setup(ti=None, **context): service_account_name="airflow-worker", in_cluster=True, get_logs=True, - startup_timeout_seconds=1200, + startup_timeout_seconds=1800, arguments=[SBG_FRCOVER_CWL, "{{ti.xcom_pull(task_ids='Setup', key='frcover_args')}}"], container_security_context={"privileged": True}, container_resources=CONTAINER_RESOURCES, diff --git a/airflow/dags/sbg_preprocess_cwl_dag.py b/airflow/dags/sbg_preprocess_cwl_dag.py index 6a18dda..7002af7 100644 --- a/airflow/dags/sbg_preprocess_cwl_dag.py +++ b/airflow/dags/sbg_preprocess_cwl_dag.py @@ -1,24 +1,23 @@ # DAG for executing the SBG Preprocess Workflow # See https://github.com/unity-sds/sbg-workflows/blob/main/preprocess/sbg-preprocess-workflow.cwl -import json +import logging import os import shutil -import uuid from datetime import datetime from airflow.models.param import Param from airflow.operators.python import PythonOperator, get_current_context from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperator +from airflow.utils.trigger_rule import TriggerRule from kubernetes.client import models as k8s +from unity_sps_utils import get_affinity from airflow import DAG -# The Kubernetes Pod that executes the CWL-Docker container -# Must use elevated privileges to start/stop the Docker engine -POD_TEMPLATE_FILE = "/opt/airflow/dags/docker_cwl_pod.yaml" - # The Kubernetes namespace within which the Pod is run (it must already exist) POD_NAMESPACE = "airflow" +POD_LABEL = "sbg_preprocess_task" +SPS_DOCKER_CWL_IMAGE = "ghcr.io/unity-sds/unity-sps/sps-docker-cwl:2.1.0" # The path of the working directory where the CWL workflow is executed # (aka the starting directory for cwl-runner). @@ -31,13 +30,21 @@ "depends_on_past": False, "start_date": datetime.utcfromtimestamp(0), } -CWL_WORKFLOW = ( +# Alternative arguments to execute SBG Pre-Process +DEFAULT_CWL_WORKFLOW = ( "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/preprocess/sbg-preprocess-workflow.cwl" ) -CMR_STAC = "https://cmr.earthdata.nasa.gov/search/granules.stac?collection_concept_id=C2408009906-LPCLOUD&temporal[]=2023-08-10T03:41:03.000Z,2023-08-10T03:41:03.000Z" +DEFAULT_CWL_ARGUMENTS = "https://raw.githubusercontent.com/unity-sds/sbg-workflows/main/preprocess/sbg-preprocess-workflow.dev.yml" +# CMR_STAC = "https://cmr.earthdata.nasa.gov/search/granules.stac?collection_concept_id=C2408009906-LPCLOUD&temporal[]=2023-08-10T03:41:03.000Z,2023-08-10T03:41:03.000Z" + +# common parameters +CONTAINER_RESOURCES = k8s.V1ResourceRequirements( + requests={"ephemeral-storage": "5Gi"}, +) +INPUT_PROCESSING_LABELS = ["SBG", "CWL"] dag = DAG( - dag_id="sbg-preprocess-cwl-dag", + dag_id="sbg_preprocess_cwl_dag", description="SBG Preprocess Workflow as CWL", tags=["SBG", "Unity", "SPS", "NASA", "JPL"], is_paused_upon_creation=False, @@ -46,52 +53,52 @@ max_active_runs=100, default_args=dag_default_args, params={ - "cwl_workflow": Param(CWL_WORKFLOW, type="string"), - "input_cmr_stac": Param(CMR_STAC, type="string"), - "input_unity_dapa_api": Param("https://d3vc8w9zcq658.cloudfront.net", type="string"), - "input_unity_dapa_client": Param("40c2s0ulbhp9i0fmaph3su9jch", type="string"), - "input_crid": Param("001", type="string"), - "output_collection_id": Param("urn:nasa:unity:unity:dev:SBG-L1B_PRE___1", type="string"), - "output_data_bucket": Param("sps-dev-ds-storage", type="string"), + "cwl_workflow": Param( + DEFAULT_CWL_WORKFLOW, + type="string", + title="CWL workflow", + description="The SBG Pre-process CWL workflow URL", + ), + "cwl_args": Param( + DEFAULT_CWL_ARGUMENTS, + type="string", + title="CWL workflow parameters", + description="The SBG Pre-process YAML parameters URL", + ), }, ) -# Task that serializes the job arguments into a JSON string def setup(ti=None, **context): + """ + Task that creates the working directory on the shared volume. + """ context = get_current_context() dag_run_id = context["dag_run"].run_id - local_dir = os.path.dirname(f"/shared-task-data/{dag_run_id}") + local_dir = f"/shared-task-data/{dag_run_id}" + logging.info(f"Creating directory: {local_dir}") os.makedirs(local_dir, exist_ok=True) - - task_dict = { - "input_processing_labels": ["label1", "label2"], - "input_cmr_stac": context["params"]["input_cmr_stac"], - "input_unity_dapa_api": context["params"]["input_unity_dapa_api"], - "input_unity_dapa_client": context["params"]["input_unity_dapa_client"], - "input_crid": context["params"]["input_crid"], - "output_collection_id": context["params"]["output_collection_id"], - "output_data_bucket": context["params"]["output_data_bucket"], - } - ti.xcom_push(key="cwl_args", value=json.dumps(task_dict)) + logging.info(f"Created directory: {local_dir}") setup_task = PythonOperator(task_id="Setup", python_callable=setup, dag=dag) -# Task that executes the specific CWL workflow with the previous arguments +# Task that executes the SBG Preprocess CWL workflow cwl_task = KubernetesPodOperator( + retries=0, + task_id="sbg_preprocess_task", namespace=POD_NAMESPACE, - name="SBG_Preprocess_CWL", - on_finish_action="delete_pod", - hostnetwork=False, - startup_timeout_seconds=1000, + name="sbg-preprocess-pod", + image=SPS_DOCKER_CWL_IMAGE, + service_account_name="airflow-worker", + in_cluster=True, get_logs=True, - task_id="SBG_Preprocess_CWL", - full_pod_spec=k8s.V1Pod(k8s.V1ObjectMeta(name=("sbg-preprocess-cwl-pod-" + uuid.uuid4().hex))), - pod_template_file=POD_TEMPLATE_FILE, - arguments=["{{ params.cwl_workflow }}", "{{ti.xcom_pull(task_ids='Setup', key='cwl_args')}}"], - dag=dag, + startup_timeout_seconds=1800, + arguments=["{{ params.cwl_workflow }}", "{{ params.cwl_args }}"], + container_security_context={"privileged": True}, + container_resources=CONTAINER_RESOURCES, + container_logs=True, volume_mounts=[ k8s.V1VolumeMount(name="workers-volume", mount_path=WORKING_DIR, sub_path="{{ dag_run.run_id }}") ], @@ -101,23 +108,36 @@ def setup(ti=None, **context): persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(claim_name="airflow-kpo"), ) ], + dag=dag, + node_selector={"karpenter.sh/nodepool": "airflow-kubernetes-pod-operator"}, + labels={"app": POD_LABEL}, + annotations={"karpenter.sh/do-not-disrupt": "true"}, + affinity=get_affinity( + capacity_type=["spot"], + instance_type=["r7i.xlarge"], + anti_affinity_label=POD_LABEL, + ), + on_finish_action="keep_pod", + is_delete_operator_pod=False, ) def cleanup(**context): + """ + Tasks that deletes all data shared between Tasks + from the Kubernetes PersistentVolume + """ dag_run_id = context["dag_run"].run_id local_dir = f"/shared-task-data/{dag_run_id}" if os.path.exists(local_dir): shutil.rmtree(local_dir) - print(f"Deleted directory: {local_dir}") + logging.info(f"Deleted directory: {local_dir}") else: - print(f"Directory does not exist, no need to delete: {local_dir}") + logging.info(f"Directory does not exist, no need to delete: {local_dir}") cleanup_task = PythonOperator( - task_id="Cleanup", - python_callable=cleanup, - dag=dag, + task_id="Cleanup", python_callable=cleanup, dag=dag, trigger_rule=TriggerRule.ALL_DONE ) diff --git a/terraform-unity/modules/terraform-unity-sps-eks/README.md b/terraform-unity/modules/terraform-unity-sps-eks/README.md index de7baf5..d892f02 100644 --- a/terraform-unity/modules/terraform-unity-sps-eks/README.md +++ b/terraform-unity/modules/terraform-unity-sps-eks/README.md @@ -38,7 +38,7 @@ | [deployment\_name](#input\_deployment\_name) | The name of the deployment. | `string` | n/a | yes | | [nodegroups](#input\_nodegroups) | A map of node group configurations |
map(object({
create_iam_role = optional(bool)
iam_role_arn = optional(string)
ami_id = optional(string)
min_size = optional(number)
max_size = optional(number)
desired_size = optional(number)
instance_types = optional(list(string))
capacity_type = optional(string)
enable_bootstrap_user_data = optional(bool)
metadata_options = optional(map(any))
block_device_mappings = optional(map(object({
device_name = string
ebs = object({
volume_size = number
volume_type = string
encrypted = bool
delete_on_termination = bool
})
})))
}))
|
{
"defaultGroup": {
"block_device_mappings": {
"xvda": {
"device_name": "/dev/xvda",
"ebs": {
"delete_on_termination": true,
"encrypted": true,
"volume_size": 100,
"volume_type": "gp2"
}
}
},
"desired_size": 1,
"instance_types": [
"t3.large"
],
"max_size": 1,
"metadata_options": {
"http_endpoint": "enabled",
"http_put_response_hop_limit": 3
},
"min_size": 1
}
}
| no | | [project](#input\_project) | The project or mission deploying Unity SPS | `string` | `"unity"` | no | -| [release](#input\_release) | The software release version. | `string` | `"2.0.1"` | no | +| [release](#input\_release) | The software release version. | `string` | `"24.2"` | no | | [service\_area](#input\_service\_area) | The service area owner of the resources being deployed | `string` | `"sps"` | no | | [venue](#input\_venue) | The MCP venue in which the cluster will be deployed (dev, test, prod) | `string` | n/a | yes | diff --git a/terraform-unity/modules/terraform-unity-sps-eks/variables.tf b/terraform-unity/modules/terraform-unity-sps-eks/variables.tf index e20d9bf..cf42ba3 100644 --- a/terraform-unity/modules/terraform-unity-sps-eks/variables.tf +++ b/terraform-unity/modules/terraform-unity-sps-eks/variables.tf @@ -28,7 +28,7 @@ variable "counter" { variable "release" { description = "The software release version." type = string - default = "2.0.1" + default = "24.2" } variable "nodegroups" { diff --git a/terraform-unity/modules/terraform-unity-sps-karpenter/variables.tf b/terraform-unity/modules/terraform-unity-sps-karpenter/variables.tf index 982cfef..e8570c3 100644 --- a/terraform-unity/modules/terraform-unity-sps-karpenter/variables.tf +++ b/terraform-unity/modules/terraform-unity-sps-karpenter/variables.tf @@ -28,7 +28,7 @@ variable "counter" { variable "release" { description = "The software release version." type = string - default = "2.0.1" + default = "24.2" } variable "helm_charts" { diff --git a/unity-test/system/integration/features/airflow_sbg_e2e.feature b/unity-test/system/integration/features/airflow_sbg_e2e.feature deleted file mode 100644 index 406c474..0000000 --- a/unity-test/system/integration/features/airflow_sbg_e2e.feature +++ /dev/null @@ -1,11 +0,0 @@ -Feature: Airflow SBG E2E processing check - - As an SBG user - I want to ensure that SBG E2E processing does not fail - So that I can get data from it - - Scenario: Check SBG E2E processing - Given the Airflow API is up and running - When I trigger a dag run for the SBG E2E dag - Then I receive a response with status code 200 - And I see an eventual successful dag run diff --git a/unity-test/system/integration/features/sbg_preprocess_workflow.feature b/unity-test/system/integration/features/sbg_preprocess_workflow.feature new file mode 100644 index 0000000..b6969e5 --- /dev/null +++ b/unity-test/system/integration/features/sbg_preprocess_workflow.feature @@ -0,0 +1,11 @@ +Feature: Airflow SBG Preprocess Workflow + + As an SBG user + I want to ensure that SBG Preprocess workflow does not fail + So that I can get data from it + + Scenario: Check SBG Preprocess Workflow + Given the Airflow API is up and running + When I trigger a dag run for the SBG Preprocess dag + Then I receive a response with status code 200 + And I see an eventual successful dag run diff --git a/unity-test/system/integration/step_defs/test_airflow_sbg_e2e.py b/unity-test/system/integration/step_defs/test_sbg_preprocess_workflow.py similarity index 83% rename from unity-test/system/integration/step_defs/test_airflow_sbg_e2e.py rename to unity-test/system/integration/step_defs/test_sbg_preprocess_workflow.py index 0d53623..9b38878 100644 --- a/unity-test/system/integration/step_defs/test_airflow_sbg_e2e.py +++ b/unity-test/system/integration/step_defs/test_sbg_preprocess_workflow.py @@ -6,11 +6,11 @@ FILE_PATH = Path(__file__) FEATURES_DIR = FILE_PATH.parent.parent / "features" -FEATURE_FILE = FEATURES_DIR / "airflow_sbg_e2e.feature" +FEATURE_FILE: Path = FEATURES_DIR / "sbg_preprocess_workflow.feature" -@scenario(FEATURE_FILE, "Check SBG E2E processing") -def test_check_sbg_e2e(): +@scenario(FEATURE_FILE, "Check SBG Preprocess Workflow") +def test_check_sbg_preprocess_workflow(): pass @@ -19,12 +19,12 @@ def api_up_and_running(): pass -@when("I trigger a dag run for the SBG E2E dag", target_fixture="response") +@when("I trigger a dag run for the SBG Preprocess dag", target_fixture="response") def trigger_dag(airflow_api_url, airflow_api_auth): # leaving out dag_run_id to avoid conflicts with previous runs- we can always fetch it from the response # unsure about contents of the conf argument, though response = requests.post( - f"{airflow_api_url}/api/v1/dags/sbg-l1-to-l2-e2e-cwl-step-by-step-dag/dagRuns", + f"{airflow_api_url}/api/v1/dags/sbg_preprocess_cwl_dag/dagRuns", auth=airflow_api_auth, json={"note": "Triggered by unity-test suite"}, ) diff --git a/utils/post_deployment.sh b/utils/post_deployment.sh new file mode 100755 index 0000000..7013c67 --- /dev/null +++ b/utils/post_deployment.sh @@ -0,0 +1,29 @@ +#!/bin/bash +#set -ex + +# Script to execute post-deployment operations +# Pre-Requisites: +# - SPS has been deployed successfully +# - AWS credentials are renewed and set in the environment +# Syntax: +# ./post_deployment.sh +# Example: +# ./post_deployment.sh http://k8s-airflow-ogcproce-944e409e1d-687289935.us-west-2.elb.amazonaws.com:5001 + +# script argument: the $WPST_API +export WPST_API=$1 + +# list of processes to be registered +declare -a procs=("cwl_dag" "karpenter_test" "sbg_L1_to_L2_e2e_cwl_step_by_step_dag") + +for proc in "${procs[@]}" +do + echo " " + # register process + echo "Registering process: $proc" + curl -X POST -H "Content-Type: application/json; charset=utf-8" --data '{"id":"'${proc}'", "version": "1.0.0"}' "${WPST_API}/processes" + # unregister process + # echo "Unregistering process: $proc" + # curl -X DELETE -H "Content-Type: application/json; charset=utf-8" "${WPST_API}/processes/${proc}" + echo " " +done diff --git a/utils/unity_sps_deploy_or_destroy_sps.sh b/utils/unity_sps_deploy_or_destroy_sps.sh new file mode 100755 index 0000000..ca12516 --- /dev/null +++ b/utils/unity_sps_deploy_or_destroy_sps.sh @@ -0,0 +1,99 @@ +#!/bin/bash +set -ex +# Script to deploy or destroy the Unity EKS, Karpenter of Airflow +# +# Syntax: ./unity_sps_deploy_or_destroy_sps.sh deploy|destroy eks|karpenter|airflow +# +# Pre-requisites: +# o Customize all parameters in the header section to target your desired deployment +# o Renew the proper AWS credentials +# +# Note: +# Components must be deployed in this order: +# deploy eks > karpenter > airflow +# Components must be destroyed in the revers order: +# destroy airflow > karpenter > eks + +# Note: +# Must make sure we don't check in a new version of this script with a real AWS account number + +# =============== START HEADER: customize this section ================= + +# set venue dependent AWS profile +export AWS_PROFILE=XXXXXXXXXXXX_mcp-tenantOperator +export AWS_REGION=us-west-2 + +# set cluster parameters +export PROJECT=unity +export SERVICE_AREA=sps +export VENUE=dev +export DEPLOYMENT=luca +export COUNTER=7 +export BUCKET=unity-unity-dev-bucket + +# the root directory of the "unity-sps" installation +export UNITY_SPS_DIR=/Users/cinquini/PycharmProjects/unity-sps + +# ============= END HEADER: do not change what follows ==================== + +# "deploy" or "destroy" +ACTION=$1 + +# "eks" or "karpenter" or "airflow" +COMPONENT=$2 + +export CLUSTER_NAME=unity-${VENUE}-sps-eks-${DEPLOYMENT}-${COUNTER} +export KUBECONFIG=${UNITY_SPS_DIR}/terraform-unity/modules/terraform-unity-sps-eks/$CLUSTER_NAME.cfg + +if [ "$COMPONENT" = "eks" ]; then + tf_dir=${UNITY_SPS_DIR}/terraform-unity/modules/terraform-unity-sps-eks +elif [ "$COMPONENT" = "karpenter" ]; then + tf_dir=${UNITY_SPS_DIR}/terraform-unity/modules/terraform-unity-sps-karpenter +elif [ "$COMPONENT" = "airflow" ]; then + tf_dir=${UNITY_SPS_DIR}/terraform-unity +fi +export TFVARS_FILENAME=unity-${VENUE}-sps-${COMPONENT}-${DEPLOYMENT}-${COUNTER}.tfvars + +# initialize Terraform +cd $tf_dir +tfswitch 1.8.2 +export KEY=sps/tfstates/${PROJECT}-${VENUE}-${SERVICE_AREA}-${COMPONENT}-${DEPLOYMENT}-${COUNTER}.tfstate +terraform init -reconfigure -backend-config="bucket=$BUCKET" -backend-config="key=$KEY" +terraform get -update + +# if new cluster --> create new tfvars file +mkdir -p tfvars +if ! [ -f tfvars/${TFVARS_FILENAME} ]; then + terraform-docs tfvars hcl . --output-file "tfvars/${TFVARS_FILENAME}" +fi + +# switch between DEPLOYMENTs +if [ "$COMPONENT" = "eks" ]; then + if [ "$ACTION" = "deploy" ]; then + echo "Deploying $COMPONENT" + terraform apply --auto-approve --var-file=tfvars/$TFVARS_FILENAME + aws eks update-kubeconfig --region us-west-2 --name $CLUSTER_NAME --kubeconfig ${KUBECONFIG} + kubectl get all -A + elif [ "$ACTION" = "destroy" ]; then + echo "Destroying $COMPONENT" + terraform destroy --auto-approve --var-file=tfvars/$TFVARS_FILENAME + fi +elif [ "$COMPONENT" = "airflow" ]; then + if [ "$ACTION" = "deploy" ]; then + echo "Deploying $COMPONENT" + terraform apply --auto-approve --var-file=tfvars/$TFVARS_FILENAME + kubectl get all -n airflow + elif [ "$ACTION" = "destroy" ]; then + echo "Destroying $COMPONENT" + terraform destroy --auto-approve --var-file=tfvars/$TFVARS_FILENAME + fi +elif [ "$COMPONENT" = "karpenter" ]; then + if [ "$ACTION" = "deploy" ]; then + echo "Deploying $COMPONENT" + terraform apply --auto-approve --var-file=tfvars/$TFVARS_FILENAME + kubectl get all -A + elif [ "$ACTION" = "destroy" ]; then + echo "Destroying $COMPONENT" + terraform destroy --auto-approve --var-file=tfvars/$TFVARS_FILENAME + fi +fi