diff --git a/jobs/tests/test_utils.py b/jobs/tests/test_utils.py index f485cfdeb..b112ee167 100644 --- a/jobs/tests/test_utils.py +++ b/jobs/tests/test_utils.py @@ -672,154 +672,138 @@ def files_data_from_FManager(): def test_convert_files_data_for_inference_without_output_bucket( - files_data_from_FManager, + files_data_from_FManager, ): expected_result = [ - { - "file": "files/1/1.pdf", - "bucket": "bucket11", - "pages": [1, 2, 3, 4, 5], - "file_id": 1, - "output_path": "runs/11/1/1", - "datasets": ["dataset11"], - "output_bucket": "another_bucket" - }, - { - "file": "files/1/1.pdf", - "bucket": "bucket11", - "pages": [6, 7, 8, 9, 10], - "file_id": 1, - "output_path": "runs/11/1/2", - "datasets": ["dataset11"], - "output_bucket": "another_bucket" - }, - { - "file": "files/2/2.pdf", - "bucket": "bucket11", - "pages": [1, 2], - "file_id": 2, - "output_path": "runs/11/2", - "datasets": ["dataset11"], - "output_bucket": "another_bucket" - }, - { - "file": "files/3/3.pdf", - "bucket": "bucket11", - "pages": [1, 2, 3, 4, 5], - "file_id": 3, - "output_path": "runs/11/3/1", - "datasets": ["dataset22"], - "output_bucket": "another_bucket" - }, - { - "file": "files/3/3.pdf", - "bucket": "bucket11", - "pages": [6], - "file_id": 3, - "output_path": "runs/11/3/2", - "datasets": ["dataset22"], - "output_bucket": "another_bucket" - }, - { - "file": "files/4/4.pdf", - "bucket": "bucket11", - "pages": [1, 2, 3, 4, 5], - "file_id": 4, - "output_path": "runs/11/4/1", - "datasets": ["dataset22"], - "output_bucket": "another_bucket" - }, - { - "file": "files/4/4.pdf", - "bucket": "bucket11", - "pages": [6, 7, 8, 9, 10], - "file_id": 4, - "output_path": "runs/11/4/2", - "datasets": ["dataset22"], - "output_bucket": "another_bucket" - }, - { - "file": "files/4/4.pdf", - "bucket": "bucket11", - "pages": [11, 12, 13, 14, 15], - "file_id": 4, - "output_path": "runs/11/4/3", - "datasets": ["dataset22"], - "output_bucket": "another_bucket" - }, - { - "file": "files/4/4.pdf", - "bucket": "bucket11", - "pages": [16, 17, 18, 19, 20], - "file_id": 4, - "output_path": "runs/11/4/4", - "datasets": ["dataset22"], - "output_bucket": "another_bucket" - }, - { - "file": "files/4/4.pdf", - "bucket": "bucket11", - "pages": [21, 22, 23, 24, 25], - "file_id": 4, - "output_path": "runs/11/4/5", - "datasets": ["dataset22"], - "output_bucket": "another_bucket" - }, - { - "file": "files/4/4.pdf", - "bucket": "bucket11", - "pages": [26, 27, 28, 29, 30], - "file_id": 4, - "output_path": "runs/11/4/6", - "datasets": ["dataset22"], - "output_bucket": "another_bucket" - }, - { - "file": "files/4/4.pdf", - "bucket": "bucket11", - "pages": [31, 32], - "file_id": 4, - "output_path": "runs/11/4/7", - "datasets": ["dataset22"], - "output_bucket": "another_bucket" - }, - { - "file": "files/5/5.pdf", - "bucket": "bucket11", - "pages": [1, 2, 3, 4, 5], - "file_id": 5, - "output_path": "runs/11/5/1", - "datasets": ["dataset22"], - "output_bucket": "another_bucket" - }, - { - "file": "files/5/5.pdf", - "bucket": "bucket11", - "pages": [6, 7, 8, 9, 10], - "file_id": 5, - "output_path": "runs/11/5/2", - "datasets": ["dataset22"], - "output_bucket": "another_bucket" - }, - { - "file": "files/5/5.pdf", - "bucket": "bucket11", - "pages": [11, 12, 13, 14, 15], - "file_id": 5, - "output_path": "runs/11/5/3", - "datasets": ["dataset22"], - "output_bucket": "another_bucket" - }, - { - "file": "files/5/5.pdf", - "bucket": "bucket11", - "pages": [16, 17, 18, 19, 20], - "file_id": 5, - "output_path": "runs/11/5/4", - "datasets": ["dataset22"], - "output_bucket": "another_bucket" - } - ] + { + "file": "files/1/1.pdf", + "bucket": "bucket11", + "pages": [1, 2, 3, 4, 5], + "file_id": 1, + "output_path": "runs/11/1/1", + "datasets": ["dataset11"], + }, + { + "file": "files/1/1.pdf", + "bucket": "bucket11", + "pages": [6, 7, 8, 9, 10], + "file_id": 1, + "output_path": "runs/11/1/2", + "datasets": ["dataset11"], + }, + { + "file": "files/2/2.pdf", + "bucket": "bucket11", + "pages": [1, 2], + "file_id": 2, + "output_path": "runs/11/2", + "datasets": ["dataset11"], + }, + { + "file": "files/3/3.pdf", + "bucket": "bucket11", + "pages": [1, 2, 3, 4, 5], + "file_id": 3, + "output_path": "runs/11/3/1", + "datasets": ["dataset22"], + }, + { + "file": "files/3/3.pdf", + "bucket": "bucket11", + "pages": [6], + "file_id": 3, + "output_path": "runs/11/3/2", + "datasets": ["dataset22"], + }, + { + "file": "files/4/4.pdf", + "bucket": "bucket11", + "pages": [1, 2, 3, 4, 5], + "file_id": 4, + "output_path": "runs/11/4/1", + "datasets": ["dataset22"], + }, + { + "file": "files/4/4.pdf", + "bucket": "bucket11", + "pages": [6, 7, 8, 9, 10], + "file_id": 4, + "output_path": "runs/11/4/2", + "datasets": ["dataset22"], + }, + { + "file": "files/4/4.pdf", + "bucket": "bucket11", + "pages": [11, 12, 13, 14, 15], + "file_id": 4, + "output_path": "runs/11/4/3", + "datasets": ["dataset22"], + }, + { + "file": "files/4/4.pdf", + "bucket": "bucket11", + "pages": [16, 17, 18, 19, 20], + "file_id": 4, + "output_path": "runs/11/4/4", + "datasets": ["dataset22"], + }, + { + "file": "files/4/4.pdf", + "bucket": "bucket11", + "pages": [21, 22, 23, 24, 25], + "file_id": 4, + "output_path": "runs/11/4/5", + "datasets": ["dataset22"], + }, + { + "file": "files/4/4.pdf", + "bucket": "bucket11", + "pages": [26, 27, 28, 29, 30], + "file_id": 4, + "output_path": "runs/11/4/6", + "datasets": ["dataset22"], + }, + { + "file": "files/4/4.pdf", + "bucket": "bucket11", + "pages": [31, 32], + "file_id": 4, + "output_path": "runs/11/4/7", + "datasets": ["dataset22"], + }, + { + "file": "files/5/5.pdf", + "bucket": "bucket11", + "pages": [1, 2, 3, 4, 5], + "file_id": 5, + "output_path": "runs/11/5/1", + "datasets": ["dataset22"], + }, + { + "file": "files/5/5.pdf", + "bucket": "bucket11", + "pages": [6, 7, 8, 9, 10], + "file_id": 5, + "output_path": "runs/11/5/2", + "datasets": ["dataset22"], + }, + { + "file": "files/5/5.pdf", + "bucket": "bucket11", + "pages": [11, 12, 13, 14, 15], + "file_id": 5, + "output_path": "runs/11/5/3", + "datasets": ["dataset22"], + }, + { + "file": "files/5/5.pdf", + "bucket": "bucket11", + "pages": [16, 17, 18, 19, 20], + "file_id": 5, + "output_path": "runs/11/5/4", + "datasets": ["dataset22"], + } +] assert ( utils.convert_files_data_for_inference( @@ -840,7 +824,7 @@ def test_convert_files_data_for_inference_with_completley_another_output_bucket( "file_id": 1, "output_path": "runs/11/1/1", "datasets": ["dataset11"], - "output_bucket": "another_bucket" + "output_bucket": "another_bucket", }, { "file": "files/1/1.pdf", @@ -849,7 +833,7 @@ def test_convert_files_data_for_inference_with_completley_another_output_bucket( "file_id": 1, "output_path": "runs/11/1/2", "datasets": ["dataset11"], - "output_bucket": "another_bucket" + "output_bucket": "another_bucket", }, { "file": "files/2/2.pdf", @@ -858,7 +842,7 @@ def test_convert_files_data_for_inference_with_completley_another_output_bucket( "file_id": 2, "output_path": "runs/11/2", "datasets": ["dataset11"], - "output_bucket": "another_bucket" + "output_bucket": "another_bucket", }, { "file": "files/3/3.pdf", @@ -867,7 +851,7 @@ def test_convert_files_data_for_inference_with_completley_another_output_bucket( "file_id": 3, "output_path": "runs/11/3/1", "datasets": ["dataset22"], - "output_bucket": "another_bucket" + "output_bucket": "another_bucket", }, { "file": "files/3/3.pdf", @@ -876,7 +860,7 @@ def test_convert_files_data_for_inference_with_completley_another_output_bucket( "file_id": 3, "output_path": "runs/11/3/2", "datasets": ["dataset22"], - "output_bucket": "another_bucket" + "output_bucket": "another_bucket", }, { "file": "files/4/4.pdf", @@ -885,7 +869,7 @@ def test_convert_files_data_for_inference_with_completley_another_output_bucket( "file_id": 4, "output_path": "runs/11/4/1", "datasets": ["dataset22"], - "output_bucket": "another_bucket" + "output_bucket": "another_bucket", }, { "file": "files/4/4.pdf", @@ -894,7 +878,7 @@ def test_convert_files_data_for_inference_with_completley_another_output_bucket( "file_id": 4, "output_path": "runs/11/4/2", "datasets": ["dataset22"], - "output_bucket": "another_bucket" + "output_bucket": "another_bucket", }, { "file": "files/4/4.pdf", @@ -903,7 +887,7 @@ def test_convert_files_data_for_inference_with_completley_another_output_bucket( "file_id": 4, "output_path": "runs/11/4/3", "datasets": ["dataset22"], - "output_bucket": "another_bucket" + "output_bucket": "another_bucket", }, { "file": "files/4/4.pdf", @@ -912,7 +896,7 @@ def test_convert_files_data_for_inference_with_completley_another_output_bucket( "file_id": 4, "output_path": "runs/11/4/4", "datasets": ["dataset22"], - "output_bucket": "another_bucket" + "output_bucket": "another_bucket", }, { "file": "files/4/4.pdf", @@ -921,7 +905,7 @@ def test_convert_files_data_for_inference_with_completley_another_output_bucket( "file_id": 4, "output_path": "runs/11/4/5", "datasets": ["dataset22"], - "output_bucket": "another_bucket" + "output_bucket": "another_bucket", }, { "file": "files/4/4.pdf", @@ -930,7 +914,7 @@ def test_convert_files_data_for_inference_with_completley_another_output_bucket( "file_id": 4, "output_path": "runs/11/4/6", "datasets": ["dataset22"], - "output_bucket": "another_bucket" + "output_bucket": "another_bucket", }, { "file": "files/4/4.pdf", @@ -939,7 +923,7 @@ def test_convert_files_data_for_inference_with_completley_another_output_bucket( "file_id": 4, "output_path": "runs/11/4/7", "datasets": ["dataset22"], - "output_bucket": "another_bucket" + "output_bucket": "another_bucket", }, { "file": "files/5/5.pdf", @@ -948,7 +932,7 @@ def test_convert_files_data_for_inference_with_completley_another_output_bucket( "file_id": 5, "output_path": "runs/11/5/1", "datasets": ["dataset22"], - "output_bucket": "another_bucket" + "output_bucket": "another_bucket", }, { "file": "files/5/5.pdf", @@ -957,7 +941,7 @@ def test_convert_files_data_for_inference_with_completley_another_output_bucket( "file_id": 5, "output_path": "runs/11/5/2", "datasets": ["dataset22"], - "output_bucket": "another_bucket" + "output_bucket": "another_bucket", }, { "file": "files/5/5.pdf", @@ -966,7 +950,7 @@ def test_convert_files_data_for_inference_with_completley_another_output_bucket( "file_id": 5, "output_path": "runs/11/5/3", "datasets": ["dataset22"], - "output_bucket": "another_bucket" + "output_bucket": "another_bucket", }, { "file": "files/5/5.pdf", @@ -975,7 +959,7 @@ def test_convert_files_data_for_inference_with_completley_another_output_bucket( "file_id": 5, "output_path": "runs/11/5/4", "datasets": ["dataset22"], - "output_bucket": "another_bucket" + "output_bucket": "another_bucket", } ] assert ( @@ -1176,7 +1160,7 @@ def test_delete_duplicates_4(): @pytest.mark.asyncio async def test_execute_external_pipeline(sign_s3_links: bool): with patch( - "jobs.utils.airflow_utils.AirflowPipeline", new=FakePipeline + "jobs.utils.airflow_utils.AirflowPipeline", new=FakePipeline ), patch("jobs.utils.JOBS_SIGNED_URL_ENABLED", new=sign_s3_links), patch( "jobs.utils.create_pre_signed_s3_url", new=patched_create_pre_signed_s3_url,