Skip to content

Commit

Permalink
Added a method to filter out "ghost" 0-sized objects GCS started to c…
Browse files Browse the repository at this point in the history
…reate
  • Loading branch information
VOvchinnikov committed Sep 14, 2023
1 parent 863c93d commit dd2574d
Showing 1 changed file with 18 additions and 5 deletions.
23 changes: 18 additions & 5 deletions gcsfs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,7 @@ async def _get_object(self, path):
maxResults=1 if not generation else None,
versions="true" if generation else None,
)
for item in resp.get("items", []):
for item in self._filter_ghost_items(resp.get("items", [])):
if item["name"] == key and (
not generation or item.get("generation") == generation
):
Expand Down Expand Up @@ -559,6 +559,19 @@ async def _list_objects(self, path, prefix="", versions=False):
self.dircache[path] = out
return out

@staticmethod
def _filter_ghost_items(items):
if not items:
items = []

filtered_items = []

for item in items:
if item.get("kind", "") != "storage#object" and item.get("size", "0") != "0":
filtered_items.append(item)

return filtered_items

async def _do_list_objects(
self, path, max_results=None, delimiter="/", prefix="", versions=False
):
Expand All @@ -581,7 +594,7 @@ async def _do_list_objects(
)

prefixes.extend(page.get("prefixes", []))
items.extend(page.get("items", []))
items.extend(self._filter_ghost_items(page.get("items", [])))
next_page_token = page.get("nextPageToken", None)

while next_page_token is not None:
Expand All @@ -599,7 +612,7 @@ async def _do_list_objects(

assert page["kind"] == "storage#objects"
prefixes.extend(page.get("prefixes", []))
items.extend(page.get("items", []))
items.extend(self._filter_ghost_items(page.get("items", [])))
next_page_token = page.get("nextPageToken", None)

items = [self._process_object(bucket, i) for i in items]
Expand All @@ -612,7 +625,7 @@ async def _list_buckets(self):
page = await self._call("GET", "b", project=self.project, json_out=True)

assert page["kind"] == "storage#buckets"
items.extend(page.get("items", []))
items.extend(self._filter_ghost_items(page.get("items", [])))
next_page_token = page.get("nextPageToken", None)

while next_page_token is not None:
Expand All @@ -625,7 +638,7 @@ async def _list_buckets(self):
)

assert page["kind"] == "storage#buckets"
items.extend(page.get("items", []))
items.extend(self._filter_ghost_items(page.get("items", [])))
next_page_token = page.get("nextPageToken", None)

buckets = [
Expand Down

0 comments on commit dd2574d

Please sign in to comment.