Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Asyncio Listing and Inventory Report Integration #573

Merged
merged 18 commits into from
Aug 18, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 44 additions & 1 deletion gcsfs/inventory_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,50 @@ class (see 'core.py').
Raises:
ValueError: If the fetched inventory reports are empty.
"""
pass
# There might be multiple inventory reports in the bucket.
inventory_report_metadata = []

# Extract out bucket and destination path of the inventory reports.
bucket = inventory_report_config.bucket
destination_path = inventory_report_config.destination_path

# Fetch the first page.
page = await gcs_file_system._call(
"GET",
"b/{}/o",
bucket,
prefix=destination_path,
json_out=True
)

inventory_report_metadata.extend(page.get("items", []))
next_page_token = page.get("nextPageToken", None)

# Keep fetching new pages as long as next page token exists.
# Note that the iteration in the while loop should most likely
# be minimal. For reference, a million objects is split up into
# two reports, and if the report is generated daily, then in a year,
# there will be roughly ~700 reports generated, which will still be
# fetched in a single page.
while next_page_token is not None:
page = await gcs_file_system._call(
"GET",
"b/{}/o",
bucket,
prefix=destination_path,
json_out=True,
pageToken=next_page_token,
)

inventory_report_metadata.extend(page.get("items", []))
next_page_token = page.get("nextPageToken", None)

# If no reports are fetched, indicates there is an error.
if len(inventory_report_metadata) == 0:
raise ValueError("No inventory reports to fetch. Check if \
your inventory report is set up correctly.")

return inventory_report_metadata

def _sort_inventory_report_metadata(unsorted_inventory_report_metadata):
"""
Expand Down
57 changes: 57 additions & 0 deletions gcsfs/tests/test_inventory_report.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import pytest
import asyncio
from datetime import datetime, timedelta

from gcsfs.core import GCSFileSystem
from gcsfs.inventory_report import InventoryReport, InventoryReportConfig

class TestInventoryReport(object):
Expand Down Expand Up @@ -184,6 +186,61 @@ def test_parse_raw_inventory_report_config_returns_correct_config(self):
except Exception as e:
pytest.fail(f"Unexpected exception: {e}.")

@pytest.mark.asyncio
async def test_fetch_inventory_report_metadata_no_reports(self, mocker):

# Create a mock for GCSFileSystem.
gcs_file_system = mocker.MagicMock(spec=GCSFileSystem)

# Mock the _call method to return a page with two items
# and then a page with one item and without next page token.
gcs_file_system._call.side_effect = [{"items": [], "nextPageToken": None}]

# Create a mock for InventoryReportConfig.
inventory_report_config = mocker.MagicMock(spec=InventoryReportConfig)
inventory_report_config.bucket = "bucket_name"
inventory_report_config.destination_path = "destination_path"

# If no inventory report metadata is fetched, an exception should be raised.
with pytest.raises(ValueError) as e_info:
martindurant marked this conversation as resolved.
Show resolved Hide resolved
await InventoryReport._fetch_inventory_report_metadata(
gcs_file_system=gcs_file_system,
inventory_report_config=inventory_report_config)
assert e_info.value == "No inventory reports to fetch. \
Check if your inventory report is set up correctly."

@pytest.mark.asyncio
async def test_fetch_inventory_report_metadata_multiple_calls(self, mocker):

# Create a mock for GCSFileSystem.
gcs_file_system = mocker.MagicMock(spec=GCSFileSystem)

# Mock the _call method to return a page with two items
# and then a page with one item and without next page token.
gcs_file_system._call.side_effect = [{"items": ["item1", "item2"], \
"nextPageToken": "token1"}, {"items": ["item3"], "nextPageToken": None}]

# Create a mock for InventoryReportConfig.
inventory_report_config = mocker.MagicMock(spec=InventoryReportConfig)
inventory_report_config.bucket = "bucket_name"
inventory_report_config.destination_path = "destination_path"

result = await InventoryReport._fetch_inventory_report_metadata(
gcs_file_system=gcs_file_system,
inventory_report_config=inventory_report_config)

# Check that _call was called with the right arguments.
calls = [mocker.call("GET", "b/{}/o", 'bucket_name',
prefix='destination_path', json_out=True),
mocker.call("GET", "b/{}/o", 'bucket_name',
prefix='destination_path', pageToken="token1", json_out=True)]
gcs_file_system._call.assert_has_calls(calls)

# Check that the function correctly processed the response
# and returned the right result.
assert result == ["item1", "item2", "item3"]



# Test fields of the inventory report config is correctly stored.
class TestInventoryReportConfig:
Expand Down