From f3c1eb07c59116a1c5a1d5b0d0f11557d8d3053c Mon Sep 17 00:00:00 2001 From: Hans Date: Mon, 11 Sep 2023 17:27:52 -0700 Subject: [PATCH] Inventory report: fix nit and typo (#578) Co-authored-by: Hans Easton --- gcsfs/core.py | 12 +++--------- gcsfs/inventory_report.py | 11 ++--------- gcsfs/tests/test_inventory_report.py | 12 ------------ gcsfs/tests/test_inventory_report_listing.py | 1 - 4 files changed, 5 insertions(+), 31 deletions(-) diff --git a/gcsfs/core.py b/gcsfs/core.py index 97ecb697..f33f6e8c 100644 --- a/gcsfs/core.py +++ b/gcsfs/core.py @@ -539,7 +539,6 @@ async def _get_object(self, path): return self._process_object(bucket, res) async def _list_objects(self, path, prefix="", versions=False, **kwargs): - bucket, key, generation = self.split_path(path) path = path.rstrip("/") @@ -583,10 +582,8 @@ async def _list_objects(self, path, prefix="", versions=False, **kwargs): return [] out = pseudodirs + items - use_snapshot_listing = ( - False - if not inventory_report_info - else inventory_report_info.get("use_snapshot_listing") + use_snapshot_listing = inventory_report_info and inventory_report_info.get( + "use_snapshot_listing" ) # Don't cache prefixed/partial listings, in addition to @@ -598,7 +595,6 @@ async def _list_objects(self, path, prefix="", versions=False, **kwargs): async def _do_list_objects( self, path, max_results=None, delimiter="/", prefix="", versions=False, **kwargs ): - """Object listing for the given {bucket}/{prefix}/ path.""" bucket, _path, generation = self.split_path(path) _path = "" if not _path else _path.rstrip("/") + "/" @@ -659,8 +655,7 @@ async def _concurrent_list_objects_helper( """ # Extract out the names of the objects fetched from the inventory report. - snapshot_object_names = [item["name"] for item in items] - snapshot_object_names = sorted(snapshot_object_names) + snapshot_object_names = sorted([item["name"] for item in items]) # Determine the number of coroutines needed to concurrent listing. # Ideally, want each coroutine to fetch a single page of objects. @@ -755,7 +750,6 @@ async def _sequential_list_objects_helper( next_page_token = page.get("nextPageToken", None) while next_page_token is not None: - page = await self._call( "GET", "b/{}/o", diff --git a/gcsfs/inventory_report.py b/gcsfs/inventory_report.py index b16aa4ca..869a6fe0 100644 --- a/gcsfs/inventory_report.py +++ b/gcsfs/inventory_report.py @@ -106,14 +106,14 @@ class (see 'core.py'). use_snapshot_listing=use_snapshot_listing, ) - # Use the config to fetch all inventory report medadata. + # Use the config to fetch all inventory report metadata. unsorted_inventory_report_metadata = await cls._fetch_inventory_report_metadata( gcs_file_system=gcs_file_system, inventory_report_config=inventory_report_config, ) # Sort the metadata based on reverse created time order. - inventory_report_metadata = cls._sort_inventory_report_medatada( + inventory_report_metadata = cls._sort_inventory_report_metadata( unsorted_inventory_report_metadata=unsorted_inventory_report_metadata ) @@ -397,13 +397,11 @@ class (see 'core.py'). # will be many inventory reports on the same day. But including this # logic for robustness. for metadata in inventory_report_metadata: - inventory_report_date = InventoryReport._convert_str_to_datetime( metadata["timeCreated"] ).date() if inventory_report_date == most_recent_date: - # Download the raw inventory report if the date matches. # Header is not needed, we only need to process and store # the content. @@ -454,7 +452,6 @@ class (see 'core.py'). objects = [] for content in inventory_report_content: - # Split the content into lines based on the specified separator. lines = content.split(record_separator) @@ -464,7 +461,6 @@ class (see 'core.py'). # Parse each line of the inventory report. for line in lines: - obj = InventoryReport._parse_inventory_report_line( inventory_report_line=line, use_snapshot_listing=use_snapshot_listing, @@ -554,18 +550,15 @@ def _construct_final_snapshot(objects, prefix, use_snapshot_listing): # Filter the prefix and returns the list if the user does not want to use # the snapshot for listing. if use_snapshot_listing is False: - return [obj for obj in objects if obj.get("name").startswith(prefix)], [] else: - # If the user wants to use the snapshot, generate both the items and # prefixes manually. items = [] prefixes = set() for obj in objects: - # Fetch the name of the object. obj_name = obj.get("name") diff --git a/gcsfs/tests/test_inventory_report.py b/gcsfs/tests/test_inventory_report.py index c01e3d69..36e42270 100644 --- a/gcsfs/tests/test_inventory_report.py +++ b/gcsfs/tests/test_inventory_report.py @@ -66,7 +66,6 @@ def test_validate_inventory_report_info( async def test_fetch_raw_inventory_report_config( self, location, id, exception, expected_result ): - # Mocking the gcs_file_system. gcs_file_system = mock.MagicMock() gcs_file_system.project = "project" @@ -95,7 +94,6 @@ async def test_fetch_raw_inventory_report_config( assert result == expected_result def test_parse_raw_inventory_report_config_invalid_date(self): - today = datetime.today().date() # Get tomorrow's date. @@ -130,7 +128,6 @@ def test_parse_raw_inventory_report_config_invalid_date(self): ) def test_parse_raw_inventory_report_config_missing_metadata_fields(self): - raw_inventory_report_config = { "frequencyOptions": mock.MagicMock(), "objectMetadataReportOptions": { @@ -149,7 +146,6 @@ def test_parse_raw_inventory_report_config_missing_metadata_fields(self): ) def test_parse_raw_inventory_report_config_returns_correct_config(self): - bucket = "bucket" destination_path = "path/to/inventory-report" metadata_fields = ["project", "bucket", "name", "size"] @@ -209,7 +205,6 @@ def test_parse_raw_inventory_report_config_returns_correct_config(self): @pytest.mark.asyncio async def test_fetch_inventory_report_metadata_no_reports(self): - # Create a mock for GCSFileSystem. gcs_file_system = mock.MagicMock(spec=GCSFileSystem) @@ -233,7 +228,6 @@ async def test_fetch_inventory_report_metadata_no_reports(self): @pytest.mark.asyncio async def test_fetch_inventory_report_metadata_multiple_calls(self): - # Create a mock for GCSFileSystem. gcs_file_system = mock.MagicMock(spec=GCSFileSystem) @@ -367,7 +361,6 @@ def download_inventory_report_content_setup(self, request): async def test_download_inventory_report_content( self, download_inventory_report_content_setup ): - ( gcs_file_system, inventory_report_metadata, @@ -418,7 +411,6 @@ def test_parse_inventory_report_line( bucket, expected, ): - # Mock InventoryReportConfig. inventory_report_config = mock.MagicMock(spec=InventoryReportConfig) inventory_report_config.obj_name_idx = inventory_report_config_attrs.get( @@ -482,7 +474,6 @@ def test_parse_inventory_report_line( ] ) def parse_inventory_report_content_setup(self, request): - # Mock the necessary parameters. gcs_file_system = mock.MagicMock() bucket = mock.MagicMock() @@ -515,7 +506,6 @@ def parse_inventory_report_content_setup(self, request): ) def test_parse_inventory_reports(self, parse_inventory_report_content_setup): - ( gcs_file_system, inventory_report_content, @@ -723,7 +713,6 @@ def test_parse_inventory_reports(self, parse_inventory_report_content_setup): def test_construct_final_snapshot( self, use_snapshot_listing, prefix, mock_objects, expected_result ): - # Construct the final snapshot. result = InventoryReport._construct_final_snapshot( objects=mock_objects, @@ -741,7 +730,6 @@ def test_construct_final_snapshot( # Test fields of the inventory report config is correctly stored. class TestInventoryReportConfig: def test_inventory_report_config_creation(self): - csv_options = {} bucket = "bucket" destination_path = "" diff --git a/gcsfs/tests/test_inventory_report_listing.py b/gcsfs/tests/test_inventory_report_listing.py index abbc008b..584485a7 100644 --- a/gcsfs/tests/test_inventory_report_listing.py +++ b/gcsfs/tests/test_inventory_report_listing.py @@ -7,7 +7,6 @@ # Basic integration test to ensure listing returns the correct result. def test_ls_base(monkeypatch, gcs): - # First get results from original listing. items = gcs.ls(TEST_BUCKET)