From dd2574d83f8c7c2e8c26c93238a8a0797d3b8c50 Mon Sep 17 00:00:00 2001 From: VOvchinnikov Date: Thu, 14 Sep 2023 18:20:57 +0200 Subject: [PATCH] Added a method to filter out "ghost" 0-sized objects GCS started to create --- gcsfs/core.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/gcsfs/core.py b/gcsfs/core.py index 390a3b7b..52305541 100644 --- a/gcsfs/core.py +++ b/gcsfs/core.py @@ -507,7 +507,7 @@ async def _get_object(self, path): maxResults=1 if not generation else None, versions="true" if generation else None, ) - for item in resp.get("items", []): + for item in self._filter_ghost_items(resp.get("items", [])): if item["name"] == key and ( not generation or item.get("generation") == generation ): @@ -559,6 +559,19 @@ async def _list_objects(self, path, prefix="", versions=False): self.dircache[path] = out return out + @staticmethod + def _filter_ghost_items(items): + if not items: + items = [] + + filtered_items = [] + + for item in items: + if item.get("kind", "") != "storage#object" and item.get("size", "0") != "0": + filtered_items.append(item) + + return filtered_items + async def _do_list_objects( self, path, max_results=None, delimiter="/", prefix="", versions=False ): @@ -581,7 +594,7 @@ async def _do_list_objects( ) prefixes.extend(page.get("prefixes", [])) - items.extend(page.get("items", [])) + items.extend(self._filter_ghost_items(page.get("items", []))) next_page_token = page.get("nextPageToken", None) while next_page_token is not None: @@ -599,7 +612,7 @@ async def _do_list_objects( assert page["kind"] == "storage#objects" prefixes.extend(page.get("prefixes", [])) - items.extend(page.get("items", [])) + items.extend(self._filter_ghost_items(page.get("items", []))) next_page_token = page.get("nextPageToken", None) items = [self._process_object(bucket, i) for i in items] @@ -612,7 +625,7 @@ async def _list_buckets(self): page = await self._call("GET", "b", project=self.project, json_out=True) assert page["kind"] == "storage#buckets" - items.extend(page.get("items", [])) + items.extend(self._filter_ghost_items(page.get("items", []))) next_page_token = page.get("nextPageToken", None) while next_page_token is not None: @@ -625,7 +638,7 @@ async def _list_buckets(self): ) assert page["kind"] == "storage#buckets" - items.extend(page.get("items", [])) + items.extend(self._filter_ghost_items(page.get("items", []))) next_page_token = page.get("nextPageToken", None) buckets = [