Skip to content

Commit

Permalink
Implemented "ghost" files skipping
Browse files Browse the repository at this point in the history
  • Loading branch information
VOvchinnikov committed May 22, 2024
1 parent d371e21 commit 59d1710
Showing 1 changed file with 24 additions and 5 deletions.
29 changes: 24 additions & 5 deletions gcsfs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,25 @@ async def _make_bucket_requester_pays(self, path, state=True):

make_bucket_requester_pays = sync_wrapper(_make_bucket_requester_pays)

@staticmethod
def _filter_ghost_items(items):
if not items:
items = []

filtered_items = []

for item in items:
if (
item.get("kind", "") == "storage#object"
and item.get("size", "") == "0"
and item.get("crc32c", "") == "AAAAAA=="
):
# This is a ghost item, skip it
continue
filtered_items.append(item)

return filtered_items

async def _get_object(self, path):
"""Return object information at the given path."""
bucket, key, generation = self.split_path(path)
Expand Down Expand Up @@ -528,7 +547,7 @@ async def _get_object(self, path):
maxResults=1 if not generation else None,
versions="true" if generation else None,
)
for item in resp.get("items", []):
for item in self._filter_ghost_items(resp.get("items", [])):
if item["name"] == key and (
not generation or item.get("generation") == generation
):
Expand Down Expand Up @@ -746,7 +765,7 @@ async def _sequential_list_objects_helper(
)

prefixes.extend(page.get("prefixes", []))
items.extend(page.get("items", []))
items.extend(self._filter_ghost_items(page.get("items", [])))
next_page_token = page.get("nextPageToken", None)

while next_page_token is not None:
Expand All @@ -766,7 +785,7 @@ async def _sequential_list_objects_helper(

assert page["kind"] == "storage#objects"
prefixes.extend(page.get("prefixes", []))
items.extend(page.get("items", []))
items.extend(self._filter_ghost_items(page.get("items", [])))
next_page_token = page.get("nextPageToken", None)

items = [self._process_object(bucket, i) for i in items]
Expand All @@ -780,7 +799,7 @@ async def _list_buckets(self):
page = await self._call("GET", "b", project=self.project, json_out=True)

assert page["kind"] == "storage#buckets"
items.extend(page.get("items", []))
items.extend(self._filter_ghost_items(page.get("items", [])))
next_page_token = page.get("nextPageToken", None)

while next_page_token is not None:
Expand All @@ -793,7 +812,7 @@ async def _list_buckets(self):
)

assert page["kind"] == "storage#buckets"
items.extend(page.get("items", []))
items.extend(self._filter_ghost_items(page.get("items", [])))
next_page_token = page.get("nextPageToken", None)

buckets = [
Expand Down

0 comments on commit 59d1710

Please sign in to comment.