From c62ceb4b1ed8f2458c54c99cec900f3ee6dfda8c Mon Sep 17 00:00:00 2001 From: Zolisa Bleki Date: Sun, 1 Sep 2024 00:53:05 +0200 Subject: [PATCH 1/2] Generalize list and list_prefix for FilesystemStore. This also improves the implementation of list_prefix since now the filesystem is walked from the prefix directory instead of from the root. --- zarr-eio/src/storage.ml | 31 +++++++++++-------------------- zarr-lwt/src/storage.ml | 37 +++++++++++++------------------------ zarr-sync/src/storage.ml | 35 ++++++++++------------------------- 3 files changed, 34 insertions(+), 69 deletions(-) diff --git a/zarr-eio/src/storage.ml b/zarr-eio/src/storage.ml index c8a5899..0bfa79b 100644 --- a/zarr-eio/src/storage.ml +++ b/zarr-eio/src/storage.ml @@ -56,14 +56,17 @@ module FilesystemStore = struct let file_offset = Eio.File.seek flow (Optint.Int63.of_int ofs) `Set in Eio.File.pwrite_all flow ~file_offset [Cstruct.of_string ~allocator str] - let list t = - let rec aux acc dir = - List.fold_left - (fun a x -> - match Eio.Path.(dir / x) with - | p when Eio.Path.is_directory p -> aux a p - | p -> (fspath_to_key t p) :: a) acc (Eio.Path.read_dir dir) - in aux [] t.root + let rec walk t acc dir = + List.fold_left + (fun a x -> + match Eio.Path.(dir / x) with + | p when Eio.Path.is_directory p -> walk t a p + | p -> (fspath_to_key t p) :: a) acc (Eio.Path.read_dir dir) + + let list t = walk t [] t.root + + let list_prefix t prefix = + walk t [] (key_to_fspath t prefix) let is_member t key = Eio.Path.is_file @@ key_to_fspath t key @@ -71,18 +74,6 @@ module FilesystemStore = struct let erase t key = Eio.Path.unlink @@ key_to_fspath t key - let list_prefix t prefix = - let rec aux acc dir = - let xs = Eio.Path.read_dir dir in - List.fold_left - (fun a x -> - match Eio.Path.(dir / x) with - | p when Eio.Path.is_directory p -> aux a p - | p -> - let key = fspath_to_key t p in - if String.starts_with ~prefix key then key :: a else a) acc xs - in aux [] t.root - let erase_prefix t pre = (* if prefix points to the root of the store, only delete sub-dirs and files.*) let prefix = key_to_fspath t pre in diff --git a/zarr-lwt/src/storage.ml b/zarr-lwt/src/storage.ml index 9abc750..95539e3 100644 --- a/zarr-lwt/src/storage.ml +++ b/zarr-lwt/src/storage.ml @@ -89,16 +89,19 @@ module FilesystemStore = struct let* () = Lwt_io.set_position oc @@ Int64.of_int ofs in Lwt_io.write oc value) rvs - let list t = - let rec filter_concat acc dir = - Lwt_stream.fold_s - (fun x a -> - if x = "." || x = ".." then Lwt.return a else - match Filename.concat dir x with - | p when Sys.is_directory p -> filter_concat a p - | p -> Lwt.return @@ fspath_to_key t p :: a) - (Lwt_unix.files_of_directory dir) acc - in filter_concat [] @@ key_to_fspath t "" + let rec walk t acc dir = + Lwt_stream.fold_s + (fun x a -> + if x = "." || x = ".." then Lwt.return a else + match Filename.concat dir x with + | p when Sys.is_directory p -> walk t a p + | p -> Lwt.return @@ fspath_to_key t p :: a) + (Lwt_unix.files_of_directory dir) acc + + let list t = walk t [] (key_to_fspath t "") + + let list_prefix t prefix = + walk t [] (key_to_fspath t prefix) let is_member t key = Lwt_unix.file_exists @@ key_to_fspath t key @@ -106,20 +109,6 @@ module FilesystemStore = struct let erase t key = Lwt_unix.unlink @@ key_to_fspath t key - let list_prefix t prefix = - let rec filter_concat acc dir = - Lwt_stream.fold_s - (fun x a -> - if x = "." || x = ".." then Lwt.return a else - match Filename.concat dir x with - | p when Sys.is_directory p -> filter_concat a p - | p -> - let key = fspath_to_key t p in - if String.starts_with ~prefix key - then Lwt.return @@ key :: a else Lwt.return a) - (Lwt_unix.files_of_directory dir) acc - in filter_concat [] @@ key_to_fspath t "" - let erase_prefix t pre = list_prefix t pre >>= Lwt_list.iter_s @@ erase t diff --git a/zarr-sync/src/storage.ml b/zarr-sync/src/storage.ml index 1b887b3..6cdbc0d 100644 --- a/zarr-sync/src/storage.ml +++ b/zarr-sync/src/storage.ml @@ -59,18 +59,6 @@ module FilesystemStore = struct Out_channel.seek oc @@ Int64.of_int rs; Out_channel.output_string oc value) rvs; Out_channel.flush oc) - let list t = - let rec aux acc dir = - match Sys.readdir dir with - | [||] -> acc - | xs -> - List.concat_map - (fun x -> - match Filename.concat dir x with - | p when Sys.is_directory p -> aux acc p - | p -> (fspath_to_key t p) :: acc) @@ Array.to_list xs - in aux [] @@ key_to_fspath t "" - let is_member t key = Sys.file_exists @@ key_to_fspath t key let erase t key = Sys.remove @@ key_to_fspath t key @@ -82,20 +70,17 @@ module FilesystemStore = struct (key_to_fspath t key) (fun ic -> In_channel.length ic |> Int64.to_int) + let rec walk t acc dir = + List.fold_left + (fun a x -> + match Filename.concat dir x with + | p when Sys.is_directory p -> walk t a p + | p -> (fspath_to_key t p) :: a) acc (Array.to_list @@ Sys.readdir dir) + + let list t = walk t [] (key_to_fspath t "") + let list_prefix t prefix = - let rec aux acc dir = - match Sys.readdir dir with - | [||] -> acc - | xs -> - List.concat_map - (fun x -> - match Filename.concat dir x with - | p when Sys.is_directory p -> aux acc p - | p -> - let k = fspath_to_key t p in - if String.starts_with ~prefix k then k :: acc else acc) - @@ Array.to_list xs - in aux [] @@ key_to_fspath t "" + walk t [] (key_to_fspath t prefix) let erase_prefix t pre = List.iter (erase t) @@ list_prefix t pre From 02b7028a9c79ec37019fac3ddbcd493a8d6e7de6 Mon Sep 17 00:00:00 2001 From: Zolisa Bleki Date: Sun, 1 Sep 2024 03:03:18 +0200 Subject: [PATCH 2/2] Improve list_dir implementation for FilesystemStore. Avoids redundant work to obtain children of a directory, which is what `list_dir` does in a FilesystemStore. --- zarr-eio/src/storage.ml | 24 +++++++----------------- zarr-lwt/src/storage.ml | 31 +++++++++++-------------------- zarr-sync/src/storage.ml | 26 +++++++------------------- zarr-sync/test/test_sync.ml | 9 +++++---- zarr/src/storage/memory.ml | 10 +++++----- zarr/src/storage/storage.ml | 19 ++++++++++++------- 6 files changed, 47 insertions(+), 72 deletions(-) diff --git a/zarr-eio/src/storage.ml b/zarr-eio/src/storage.ml index 0bfa79b..b87c8d8 100644 --- a/zarr-eio/src/storage.ml +++ b/zarr-eio/src/storage.ml @@ -82,23 +82,13 @@ module FilesystemStore = struct else Eio.Path.rmtree ~missing_ok:true prefix let list_dir t prefix = - let module S = Zarr.Util.StrSet in - let n = String.length prefix in - let rec aux acc dir = - List.fold_left - (fun ((l, r) as a) x -> - match Eio.Path.(dir / x) with - | p when Eio.Path.is_directory p -> aux a p - | p -> - let key = fspath_to_key t p in - let pred = String.starts_with ~prefix key in - match key with - | k when pred && String.contains_from k n '/' -> - S.add String.(sub k 0 @@ 1 + index_from k n '/') l, r - | k when pred -> l, k :: r - | _ -> a) acc (Eio.Path.read_dir dir) in - let prefs, keys = aux (S.empty, []) t.root in - keys, S.elements prefs + let dir = key_to_fspath t prefix in + List.partition_map + (fun x -> + match Eio.Path.(dir / x) with + | p when Eio.Path.is_directory p -> + Either.right @@ (fspath_to_key t p) ^ "/" + | p -> Either.left @@ fspath_to_key t p) (Eio.Path.read_dir dir) end module U = Zarr.Util diff --git a/zarr-lwt/src/storage.ml b/zarr-lwt/src/storage.ml index 95539e3..c5a12e4 100644 --- a/zarr-lwt/src/storage.ml +++ b/zarr-lwt/src/storage.ml @@ -113,26 +113,17 @@ module FilesystemStore = struct list_prefix t pre >>= Lwt_list.iter_s @@ erase t let list_dir t prefix = - let module S = Zarr.Util.StrSet in - let n = String.length prefix in - let rec filter_concat acc dir = - Lwt_stream.fold_s - (fun x ((l, r) as a) -> - if x = "." || x = ".." then Lwt.return a else - match Filename.concat dir x with - | p when Sys.is_directory p -> filter_concat a p - | p -> - let key = fspath_to_key t p in - let pred = String.starts_with ~prefix key in - match key with - | k when pred && String.contains_from k n '/' -> - Lwt.return (S.add String.(sub k 0 @@ 1 + index_from k n '/') l, r) - | k when pred -> Lwt.return (l, k :: r) - | _ -> Lwt.return a) - (Lwt_unix.files_of_directory dir) acc - in - let+ y, x = filter_concat (S.empty, []) @@ key_to_fspath t "" in - x, S.elements y + let dir = key_to_fspath t prefix in + let+ files = + Lwt_stream.to_list @@ Lwt_stream.filter + (fun x -> if x = "." || x = ".." then false else true) + (Lwt_unix.files_of_directory dir) in + List.partition_map + (fun x -> + match Filename.concat dir x with + | p when Sys.is_directory p -> + Either.right @@ (fspath_to_key t p) ^ "/" + | p -> Either.left @@ fspath_to_key t p) files end module U = Zarr.Util diff --git a/zarr-sync/src/storage.ml b/zarr-sync/src/storage.ml index 6cdbc0d..a48d324 100644 --- a/zarr-sync/src/storage.ml +++ b/zarr-sync/src/storage.ml @@ -86,25 +86,13 @@ module FilesystemStore = struct List.iter (erase t) @@ list_prefix t pre let list_dir t prefix = - let module StrSet = Zarr.Util.StrSet in - let n = String.length prefix in - let rec aux acc dir = - let xs = Sys.readdir dir in - List.fold_left - (fun ((l, r) as a) x -> - match Filename.concat dir x with - | p when Sys.is_directory p -> aux a p - | p -> - let key = fspath_to_key t p in - let pred = String.starts_with ~prefix key in - match key with - | k when pred && String.contains_from k n '/' -> - StrSet.add String.(sub k 0 @@ 1 + index_from k n '/') l, r - | k when pred -> l, k :: r - | _ -> a) acc (Array.to_list xs) - in - let prefs, keys = aux (StrSet.empty, []) @@ key_to_fspath t "" in - keys, StrSet.elements prefs + let dir = key_to_fspath t prefix in + List.partition_map + (fun x -> + match Filename.concat dir x with + | p when Sys.is_directory p -> + Either.right @@ (fspath_to_key t p) ^ "/" + | p -> Either.left @@ fspath_to_key t p) (Array.to_list @@ Sys.readdir dir) end module U = Zarr.Util diff --git a/zarr-sync/test/test_sync.ml b/zarr-sync/test/test_sync.ml index 02f94ae..1a04a9e 100644 --- a/zarr-sync/test/test_sync.ml +++ b/zarr-sync/test/test_sync.ml @@ -107,7 +107,7 @@ let test_storage (Zarr.Storage.Invalid_data_type) (fun () -> write_array store anode slice badarray); - let child = GroupNode.of_path "/some/child" in + let child = GroupNode.of_path "/some/child/group" in create_group store child; let arrays, groups = find_child_nodes store gnode in assert_equal @@ -115,15 +115,16 @@ let test_storage assert_equal ~printer:string_of_list ["/some"] (List.map GroupNode.to_path groups); - let c = find_child_nodes store @@ GroupNode.(root / "fakegroup") in - assert_equal ([], []) c; + assert_equal ([], []) @@ find_child_nodes store child; + assert_equal ([], []) @@ find_child_nodes store GroupNode.(root / "fakegroup"); let ac, gc = find_all_nodes store in let got = List.fast_sort String.compare @@ List.map ArrayNode.show ac @ List.map GroupNode.show gc in assert_equal - ~printer:string_of_list ["/"; "/arrnode"; "/some"; "/some/child"] got; + ~printer:string_of_list + ["/"; "/arrnode"; "/some"; "/some/child"; "/some/child/group"] got; let nshape = [|25; 32; 10|] in reshape store anode nshape; diff --git a/zarr/src/storage/memory.ml b/zarr/src/storage/memory.ml index eebcac1..d33a6b0 100644 --- a/zarr/src/storage/memory.ml +++ b/zarr/src/storage/memory.ml @@ -68,17 +68,17 @@ module Make (Deferred : Types.Deferred) = struct else Deferred.return_unit let list_dir t prefix = + let m = Atomic.get t in let module S = Util.StrSet in - let+ xs = list t in let n = String.length prefix in let prefs, keys = - List.fold_left - (fun ((l, r) as a) key -> + StrMap.fold + (fun key _ ((l, r) as a) -> let pred = String.starts_with ~prefix key in match key with | k when pred && String.contains_from k n '/' -> let l' = S.add String.(sub k 0 @@ 1 + index_from k n '/') l in l', r | k when pred -> l, k :: r - | _ -> a) (S.empty, []) xs - in keys, S.elements prefs + | _ -> a) m (S.empty, []) + in Deferred.return (keys, S.elements prefs) end diff --git a/zarr/src/storage/storage.ml b/zarr/src/storage/storage.ml index 2a0c802..6860d13 100644 --- a/zarr/src/storage/storage.ml +++ b/zarr/src/storage/storage.ml @@ -59,13 +59,18 @@ module Make (Io : Types.IO) = struct Util.to_string @@ Util.member "node_type" @@ from_string s let find_child_nodes t node = - let* _, gp = list_dir t @@ GroupNode.to_prefix node in - Deferred.fold_left - (fun (l, r) pre -> - let+ nt = node_type t @@ pre ^ "zarr.json" in - let p = "/" ^ String.(length pre - 1 |> sub pre 0) in - if nt = "array" then ArrayNode.of_path p :: l, r - else l, GroupNode.of_path p :: r) ([], []) gp + let prefix = GroupNode.to_prefix node in + let res = [], [] in + is_member t @@ prefix ^ "zarr.json" >>= function + | false -> Deferred.return res + | true -> + let* _, ps = list_dir t prefix in + Deferred.fold_left + (fun (l, r) pre -> + let+ nt = node_type t @@ pre ^ "zarr.json" in + let p = "/" ^ String.(length pre - 1 |> sub pre 0) in + if nt = "array" then ArrayNode.of_path p :: l, r + else l, GroupNode.of_path p :: r) res ps let find_all_nodes t = let* keys = list t in