Skip to content

Commit

Permalink
adlfs: don't append / to dirs in find() and fix glob()
Browse files Browse the repository at this point in the history
This makes it consistent with localfs/gcsfs/s3fs and also makes glob work
after changes in fsspec/filesystem_spec#1382
  • Loading branch information
efiop committed Dec 22, 2023
1 parent ff919cf commit 616d708
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 40 deletions.
9 changes: 6 additions & 3 deletions adlfs/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -931,14 +931,15 @@ async def _find(self, path, withdirs=False, prefix="", with_parent=False, **kwar
for info in infos:
name = _name = info["name"]
while True:
parent_dir = self._parent(_name).rstrip("/") + "/"
stripped_parent_dir = self._parent(_name).rstrip("/")
parent_dir = stripped_parent_dir + "/"
if (
parent_dir not in dir_set
and parent_dir != full_path.strip("/") + "/"
):
dir_set.add(parent_dir)
dirs[parent_dir] = {
"name": parent_dir,
dirs[stripped_parent_dir] = {
"name": stripped_parent_dir,
"type": "directory",
"size": 0,
}
Expand All @@ -947,6 +948,8 @@ async def _find(self, path, withdirs=False, prefix="", with_parent=False, **kwar
break

if info["type"] == "directory":
name = name.rstrip("/")
info["name"] = info["name"].rstrip("/")
dirs[name] = info
if info["type"] == "file":
files[name] = info
Expand Down
72 changes: 36 additions & 36 deletions adlfs/tests/test_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,35 +450,35 @@ def test_find(storage):

# all files and directories
assert fs.find("data/root", withdirs=True) == [
"data/root/a/",
"data/root/a",
"data/root/a/file.txt",
"data/root/a1/",
"data/root/a1",
"data/root/a1/file1.txt",
"data/root/b/",
"data/root/b",
"data/root/b/file.txt",
"data/root/c/",
"data/root/c",
"data/root/c/file1.txt",
"data/root/c/file2.txt",
"data/root/d/",
"data/root/d",
"data/root/d/file_with_metadata.txt",
"data/root/e+f/",
"data/root/e+f",
"data/root/e+f/file1.txt",
"data/root/e+f/file2.txt",
"data/root/rfile.txt",
]
assert fs.find("data/root/", withdirs=True) == [
"data/root/a/",
"data/root/a",
"data/root/a/file.txt",
"data/root/a1/",
"data/root/a1",
"data/root/a1/file1.txt",
"data/root/b/",
"data/root/b",
"data/root/b/file.txt",
"data/root/c/",
"data/root/c",
"data/root/c/file1.txt",
"data/root/c/file2.txt",
"data/root/d/",
"data/root/d",
"data/root/d/file_with_metadata.txt",
"data/root/e+f/",
"data/root/e+f",
"data/root/e+f/file1.txt",
"data/root/e+f/file2.txt",
"data/root/rfile.txt",
Expand All @@ -494,17 +494,17 @@ def test_find(storage):
]

assert fs.find("data/root", prefix="a", withdirs=True) == [
"data/root/a/",
"data/root/a",
"data/root/a/file.txt",
"data/root/a1/",
"data/root/a1",
"data/root/a1/file1.txt",
]

find_results = fs.find("data/root", prefix="a1", withdirs=True, detail=True)
assert_blobs_equals(
list(find_results.values()),
[
{"name": "data/root/a1/", "size": 0, "type": "directory"},
{"name": "data/root/a1", "size": 0, "type": "directory"},
{
"name": "data/root/a1/file1.txt",
"size": 10,
Expand Down Expand Up @@ -551,12 +551,12 @@ def test_glob(storage):

# top-level contents of a directory
assert fs.glob("data/root/*") == [
"data/root/a/",
"data/root/a1/",
"data/root/b/",
"data/root/c/",
"data/root/d/",
"data/root/e+f/",
"data/root/a",
"data/root/a1",
"data/root/b",
"data/root/c",
"data/root/d",
"data/root/e+f",
"data/root/rfile.txt",
]

Expand Down Expand Up @@ -606,36 +606,36 @@ def test_glob(storage):

# all files
assert fs.glob("data/root/**") == [
"data/root/a/",
"data/root/a",
"data/root/a/file.txt",
"data/root/a1/",
"data/root/a1",
"data/root/a1/file1.txt",
"data/root/b/",
"data/root/b",
"data/root/b/file.txt",
"data/root/c/",
"data/root/c",
"data/root/c/file1.txt",
"data/root/c/file2.txt",
"data/root/d/",
"data/root/d",
"data/root/d/file_with_metadata.txt",
"data/root/e+f/",
"data/root/e+f",
"data/root/e+f/file1.txt",
"data/root/e+f/file2.txt",
"data/root/rfile.txt",
]
assert fs.glob("data/roo**") == [
"data/root/",
"data/root/a/",
assert fs.glob("data/roo*/**") == [
"data/root",
"data/root/a",
"data/root/a/file.txt",
"data/root/a1/",
"data/root/a1",
"data/root/a1/file1.txt",
"data/root/b/",
"data/root/b",
"data/root/b/file.txt",
"data/root/c/",
"data/root/c",
"data/root/c/file1.txt",
"data/root/c/file2.txt",
"data/root/d/",
"data/root/d",
"data/root/d/file_with_metadata.txt",
"data/root/e+f/",
"data/root/e+f",
"data/root/e+f/file1.txt",
"data/root/e+f/file2.txt",
"data/root/rfile.txt",
Expand Down Expand Up @@ -1191,7 +1191,7 @@ def test_dask_parquet(storage):
write_metadata_file=True,
)
assert fs.glob("test/test_group3.parquet/*") == [
"test/test_group3.parquet/A=1/",
"test/test_group3.parquet/A=1",
"test/test_group3.parquet/_common_metadata",
"test/test_group3.parquet/_metadata",
]
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ dependencies = [
"azure-datalake-store>=0.0.46,<0.1",
"azure-identity",
"azure-storage-blob>=12.12.0",
"fsspec>=2023.9.0",
"fsspec>=2023.12.0",
"aiohttp>=3.7.0",
]

Expand Down

0 comments on commit 616d708

Please sign in to comment.