diff --git a/doc/dune b/doc/dune new file mode 100644 index 0000000..d040c7b --- /dev/null +++ b/doc/dune @@ -0,0 +1,2 @@ +(documentation + (package zarr)) diff --git a/doc/index.mld b/doc/index.mld new file mode 100644 index 0000000..4a7c41b --- /dev/null +++ b/doc/index.mld @@ -0,0 +1,19 @@ +{0 The [zarr] library} + +The Zarr library provides an OCaml implementation of the Zarr version 3 +storage format specification for chunked & compressed multi-dimensional +arrays, designed for use in parallel computing. The storage format is used +by many companies including Google, NASA, Microsoft and {{:https://zarr.dev/adopters/}many others}. +Zarr's goal is to provide the following features: +- Chunk multi-dimensional arrays along any dimension. +- Store arrays in memory, on disk, inside a Zip file or any remote storage backend. +- Read and write arrays concurrently from multiple threads or processes. +- Organize arrays into hierarchies using groups. + +See {{:https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html}Zarr V3 specification}. + +Author: Zolisa Bleki + +{1 Entry Point} + +The entry point of this library is the module {!zarr}. diff --git a/dune-project b/dune-project index f221357..a536fec 100644 --- a/dune-project +++ b/dune-project @@ -2,6 +2,8 @@ (name zarr) +(version 0.1.0) + (generate_opam_files true) (source @@ -13,11 +15,11 @@ (license BSD-3-Clause) -(documentation https://zoj613.github.io/zarr-ml/zarr/Zarr/index.html) +(documentation https://zoj613.github.io/zarr-ml) (package (name zarr) - (synopsis "A short synopsis") + (synopsis "An Ocaml implementation of the Zarr V3 specification.") (description "A longer description") (depends dune @@ -33,6 +35,6 @@ (bisect_ppx (and :dev (>= 2.5.0) :with-test))) (tags - (topics "to describe" your project))) + ("zarr" "chunked arrays" "zarr version 3"))) ; See the complete stanza docs at https://dune.readthedocs.io/en/stable/dune-files.html#dune-project diff --git a/lib/zarr.mli b/lib/zarr.mli index 3ba672b..385e766 100644 --- a/lib/zarr.mli +++ b/lib/zarr.mli @@ -1,6 +1,132 @@ +(* Copyright (c) 2024, Zolisa Bleki + + SPDX-License-Identifier: BSD-3-Clause *) + +(** + [zarr] Provides an Ocaml implementation of the Zarr version 3 storage + format specification. It supports creation of arrays and groups as well + as chunking arrays along any dimension. One can store a Zarr hierarchy in + memory or on disk. Zarr also supports reading zarr hierarchies created using + other implementations, as long as they are spec-compliant. + + Consult the {{!examples}examples} and {{!limitations}limitations} for more info. + + {3 References} + {ul + {- {{:https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html}The Zarr Version 3 specification.}} + {- {{:https://zarr.dev/}Zarr community site.}} + } + *) + +(** {1 Node} *) + module Node = Node -module Indexing = Util.Indexing + +(** {1 Metadata} *) + module ArrayMetadata = Metadata.ArrayMetadata module GroupMetadata = Metadata.GroupMetadata + +(** {1 Storage} *) + module Storage = Storage + +(** {1 Codecs} *) + module Codecs = Codecs + +(** {1 Indexing} *) + +module Indexing = Util.Indexing + +(** {1:examples Examples} + + {2:create_array Create, read & write array.} + {@ocaml[ + open Zarr + open Zarr.Node + open Zarr.Codecs + open Zarr.Storage + + let store = + Result.get_ok @@ FilesystemStore.open_or_create "testdata.zarr" in + let group_node = Result.get_ok @@ GroupNode.of_path "/some/group" in + FilesystemStore.create_group store group_node; + let array_node = Result.get_ok @@ ArrayNode.(group_node / "name") in + FilesystemStore.create_array + ~codecs:[`Transpose [|2; 0; 1|]; `Bytes BE; `Gzip L2] + ~shape:[|100; 100; 50|] + ~chunks:[|10; 15; 20|] + Bigarray.Float32 + Float.neg_infinity + array_node + store; + let slice = Owl_types.[|R [0; 20]; I 10; R []|] in + let x = + Result.get_ok @@ + FilesystemStore.get_array store array_node slice Bigarray.Float32 in + let x' = + Owl.Dense.Ndarray.Generic.map + (fun _ -> Owl_stats_dist.uniform_rvs 0. 10.) x in + FilesystemStore.set_array store array_node slice x'; + ]} + + {2:sharding Using sharding codec.} + {@ocaml[ + let config = + {chunk_shape = [|5; 3; 5|] + ;codecs = [`Transpose [|2; 0; 1|]; `Bytes LE; `Gzip L5] + ;index_codecs = [`Bytes BE; `Crc32c] + ;index_location = Start} in + let shard_node = Result.get_ok @@ ArrayNode.(group_node / "another") in + FilesystemStore.create_array + ~codecs:[`ShardingIndexed config] + ~shape:[|100; 100; 50|] + ~chunks:[|10; 15; 20|] + Bigarray.Complex32 + Complex.zero + shard_node + store; + ]} + + {2:explore Explore a Zarr hierarchy.} + Functions to query a zarr hierarchy are provided. These include listing + all nodes, finding children of a group node, resizing an array, deleting + nodes, obtaining metadata of a node, and more. + {@ocaml[ + let a, g = FilesystemStore.find_all_nodes store in + FilesystemStore.reshape store array_node [|25; 32; 10|]; + let meta = + Result.get_ok @@ FilesystemStore.group_metadata store group_node in + GroupMetadata.show meta; + FilesystemStore.array_exists store shard_node; + let a, g = FilesystemStore.find_child_nodes store group_node in + FilesystemStore.erase_group_node store group_node; + ]} + + *) + +(** {1:extensions Extension Points} + + This library also provides custom extensions not defined in the version 3 + specification. These are tabulated below: + {table + {tr + {th Extension Point} + {th Details}} + {tr + {td Data Types} + {td [char], [complex32], [int] (63-bit integer), [nativeint]}} + } + *) + +(** {1:limitations Limitations} + + Although this implementation tries to be spec compliant, it does come with + a few limitations: + {ul + {- Ocaml does not have support for unsigned integers as array data types + and thus this library cannot support reading values of datatypes + [uint32], [uint64] and [complex128].} + } + *) diff --git a/zarr.opam b/zarr.opam index 1d5cd3b..46b39d3 100644 --- a/zarr.opam +++ b/zarr.opam @@ -1,13 +1,14 @@ # This file is generated by dune, edit dune-project instead opam-version: "2.0" -synopsis: "A short synopsis" +version: "0.1.0" +synopsis: "An Ocaml implementation of the Zarr V3 specification." description: "A longer description" maintainer: ["Zolisa Bleki"] authors: ["Zolisa Bleki"] license: "BSD-3-Clause" -tags: ["topics" "to describe" "your" "project"] +tags: ["zarr" "chunked arrays" "zarr version 3"] homepage: "https://github.com/zoj613/zarr-ml" -doc: "https://zoj613.github.io/zarr-ml/zarr/Zarr/index.html" +doc: "https://zoj613.github.io/zarr-ml" bug-reports: "https://github.com/zoj613/zarr-ml/issues" depends: [ "dune" {>= "3.15"}