Skip to content

Commit

Permalink
Reduce code duplication in Codecs module.
Browse files Browse the repository at this point in the history
This is achieved by using the interface trick so that type definitions
are contained in one place.
  • Loading branch information
zoj613 committed Jul 13, 2024
1 parent ba58aa9 commit 71216f8
Show file tree
Hide file tree
Showing 9 changed files with 124 additions and 176 deletions.
8 changes: 2 additions & 6 deletions lib/codecs/array_to_array.ml
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
module Ndarray = Owl.Dense.Ndarray.Generic

type arraytoarray =
[ `Transpose of int array ]
open Codecs_intf

type error =
[ `Transpose_order of int array * string ]
module Ndarray = Owl.Dense.Ndarray.Generic

(* https://zarr-specs.readthedocs.io/en/latest/v3/codecs/transpose/v1.0.html *)
module TransposeCodec = struct
Expand Down
8 changes: 2 additions & 6 deletions lib/codecs/array_to_array.mli
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
module Ndarray = Owl.Dense.Ndarray.Generic

type arraytoarray =
[ `Transpose of int array ]
open Codecs_intf

type error =
[ `Transpose_order of int array * string ]
module Ndarray = Owl.Dense.Ndarray.Generic

module ArrayToArray : sig
val parse
Expand Down
26 changes: 1 addition & 25 deletions lib/codecs/array_to_bytes.ml
Original file line number Diff line number Diff line change
@@ -1,34 +1,10 @@
open Array_to_array
open Bytes_to_bytes
open Util.Result_syntax
open Codecs_intf

module Ndarray = Owl.Dense.Ndarray.Generic

type endianness = Little | Big

type loc = Start | End

type arraytobytes =
[ `Bytes of endianness
| `ShardingIndexed of shard_config ]

and shard_config =
{chunk_shape : int array
;codecs : bytestobytes shard_chain
;index_codecs : fixed_bytestobytes shard_chain
;index_location : loc}

and 'a shard_chain =
{a2a: arraytoarray list
;a2b: arraytobytes
;b2b: 'a list}

type error =
[ Extensions.error
| Array_to_array.error
| Bytes_to_bytes.error
| `Sharding of int array * int array * string ]

(* https://zarr-specs.readthedocs.io/en/latest/v3/codecs/bytes/v1.0.html *)
module BytesCodec = struct
let compute_encoded_size (input_size : int) = input_size
Expand Down
28 changes: 1 addition & 27 deletions lib/codecs/array_to_bytes.mli
Original file line number Diff line number Diff line change
@@ -1,33 +1,7 @@
open Array_to_array
open Bytes_to_bytes
open Codecs_intf

module Ndarray = Owl.Dense.Ndarray.Generic

type endianness = Little | Big

type loc = Start | End

type arraytobytes =
[ `Bytes of endianness
| `ShardingIndexed of shard_config ]

and shard_config =
{chunk_shape : int array
;codecs : bytestobytes shard_chain
;index_codecs : fixed_bytestobytes shard_chain
;index_location : loc}

and 'a shard_chain =
{a2a: arraytoarray list
;a2b: arraytobytes
;b2b: 'a list}

type error =
[ Extensions.error
| Array_to_array.error
| Bytes_to_bytes.error
| `Sharding of int array * int array * string ]

module ArrayToBytes : sig
val parse
: ('a, 'b) Util.array_repr ->
Expand Down
17 changes: 2 additions & 15 deletions lib/codecs/bytes_to_bytes.ml
Original file line number Diff line number Diff line change
@@ -1,19 +1,6 @@
module Ndarray = Owl.Dense.Ndarray.Generic

type compression_level =
| L0 | L1 | L2 | L3 | L4 | L5 | L6 | L7 | L8 | L9

type fixed_bytestobytes =
[ `Crc32c ]
open Codecs_intf

type variable_bytestobytes =
[ `Gzip of compression_level ]

type bytestobytes =
[ fixed_bytestobytes | variable_bytestobytes ]

type error =
[ `Gzip of Ezgzip.error ]
module Ndarray = Owl.Dense.Ndarray.Generic

(* https://zarr-specs.readthedocs.io/en/latest/v3/codecs/gzip/v1.0.html *)
module GzipCodec = struct
Expand Down
17 changes: 2 additions & 15 deletions lib/codecs/bytes_to_bytes.mli
Original file line number Diff line number Diff line change
@@ -1,19 +1,6 @@
module Ndarray = Owl.Dense.Ndarray.Generic

type compression_level =
| L0 | L1 | L2 | L3 | L4 | L5 | L6 | L7 | L8 | L9

type fixed_bytestobytes =
[ `Crc32c ]
open Codecs_intf

type variable_bytestobytes =
[ `Gzip of compression_level ]

type bytestobytes =
[ fixed_bytestobytes | variable_bytestobytes ]

type error =
[ `Gzip of Ezgzip.error ]
module Ndarray = Owl.Dense.Ndarray.Generic

module BytesToBytes : sig
val compute_encoded_size : int -> fixed_bytestobytes -> int
Expand Down
24 changes: 4 additions & 20 deletions lib/codecs/codecs.ml
Original file line number Diff line number Diff line change
@@ -1,26 +1,14 @@
include Bytes_to_bytes
include Array_to_array
include Array_to_bytes
open Bytes_to_bytes
open Array_to_array
open Array_to_bytes
open Util.Result_syntax

module Ndarray = Owl.Dense.Ndarray.Generic

type error =
[ `Extension of string
| `Gzip of Ezgzip.error
| `Transpose_order of int array * string
| `CodecChain of string
| `Sharding of int array * int array * string ]

type codec_chain =
[ arraytoarray | arraytobytes | bytestobytes ] list
include Codecs_intf

type internal_chain =
{a2a : arraytoarray list
;a2b : arraytobytes
;b2b : bytestobytes list}
(*;b2b_fixed : fixed_bytestobytes list
;b2b_variable : variable_bytestobytes list} *)

module Chain = struct
type t = internal_chain
Expand All @@ -42,10 +30,6 @@ module Chain = struct
| _ ->
Result.error @@ `CodecChain "Must be exactly one array->bytes codec.")
>>= fun (a2b, b2b) ->
(* let b2b_fixed, b2b_variable = List.partition_map (function
| #fixed_bytestobytes as c -> Either.left c
| #variable_bytestobytes as c -> Either.right c) rest
in *)
let ic = {a2a; a2b; b2b} in
List.fold_left
(fun acc c ->
Expand Down
64 changes: 2 additions & 62 deletions lib/codecs/codecs.mli
Original file line number Diff line number Diff line change
Expand Up @@ -3,69 +3,9 @@
This module contains building blocks for creating and working with
a chain of codecs. *)

module Ndarray = Owl.Dense.Ndarray.Generic

(** The type of [array -> array] codecs. *)
type arraytoarray =
[ `Transpose of int array ]

(** A type representing valid Gzip codec compression levels. *)
type compression_level =
| L0 | L1 | L2 | L3 | L4 | L5 | L6 | L7 | L8 | L9

(** A type representing [bytes -> bytes] codecs that produce
fixed sized encoded strings. *)
type fixed_bytestobytes =
[ `Crc32c ]

(** A type representing [bytes -> bytes] codecs that produce
variable sized encoded strings. *)
type variable_bytestobytes =
[ `Gzip of compression_level ]

(** The type of [bytes -> bytes] codecs. *)
type bytestobytes =
[ fixed_bytestobytes | variable_bytestobytes ]

(** A type representing the configured endianness of an array. *)
type endianness = Little | Big

(** A type representing the location of a shard's index array in
an encoded byte string. *)
type loc = Start | End

(** The type of [array -> bytes] codecs. *)
type arraytobytes =
[ `Bytes of endianness
| `ShardingIndexed of shard_config ]

(** A type representing the Sharding indexed codec's configuration parameters. *)
and shard_config =
{chunk_shape : int array
;codecs : bytestobytes shard_chain
;index_codecs : fixed_bytestobytes shard_chain
;index_location : loc}

(** A type representing the chain of codecs used to encode/decode
a shard's bytes and its index array. *)
and 'a shard_chain =
{a2a: arraytoarray list
;a2b: arraytobytes
;b2b: 'a list}

(** A type used to build a user-defined chain of codecs when creating a Zarr array. *)
type codec_chain =
[ arraytoarray | arraytobytes | bytestobytes ] list

(** The type of errors returned upon failure when an calling a function
on a {!Chain} type. *)
type error =
[ `Extension of string
| `Gzip of Ezgzip.error
| `Transpose_order of int array * string
| `CodecChain of string
| `Sharding of int array * int array * string ]
include Codecs_intf.Interface

module Ndarray = Owl.Dense.Ndarray.Generic

(** A module containing functions to encode/decode an array chunk using a
predefined set of codecs. *)
Expand Down
108 changes: 108 additions & 0 deletions lib/codecs/codecs_intf.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
type arraytoarray =
[ `Transpose of int array ]

type compression_level =
| L0 | L1 | L2 | L3 | L4 | L5 | L6 | L7 | L8 | L9

type fixed_bytestobytes =
[ `Crc32c ]

type variable_bytestobytes =
[ `Gzip of compression_level ]

type bytestobytes =
[ fixed_bytestobytes | variable_bytestobytes ]

type endianness = Little | Big

type loc = Start | End

type arraytobytes =
[ `Bytes of endianness
| `ShardingIndexed of shard_config ]

and shard_config =
{chunk_shape : int array
;codecs : bytestobytes shard_chain
;index_codecs : fixed_bytestobytes shard_chain
;index_location : loc}

and 'a shard_chain =
{a2a: arraytoarray list
;a2b: arraytobytes
;b2b: 'a list}

type codec_chain =
[ arraytoarray | arraytobytes | bytestobytes ] list

type error =
[ `Extension of string
| `Gzip of Ezgzip.error
| `Transpose_order of int array * string
| `CodecChain of string
| `Sharding of int array * int array * string ]

module Ndarray = Owl.Dense.Ndarray.Generic

module type Interface = sig
(** The type of [array -> array] codecs. *)
type arraytoarray =
[ `Transpose of int array ]

(** A type representing valid Gzip codec compression levels. *)
type compression_level =
| L0 | L1 | L2 | L3 | L4 | L5 | L6 | L7 | L8 | L9

(** A type representing [bytes -> bytes] codecs that produce
fixed sized encoded strings. *)
type fixed_bytestobytes =
[ `Crc32c ]

(** A type representing [bytes -> bytes] codecs that produce
variable sized encoded strings. *)
type variable_bytestobytes =
[ `Gzip of compression_level ]

(** The type of [bytes -> bytes] codecs. *)
type bytestobytes =
[ fixed_bytestobytes | variable_bytestobytes ]

(** A type representing the configured endianness of an array. *)
type endianness = Little | Big

(** A type representing the location of a shard's index array in
an encoded byte string. *)
type loc = Start | End

(** The type of [array -> bytes] codecs. *)
type arraytobytes =
[ `Bytes of endianness
| `ShardingIndexed of shard_config ]

(** A type representing the Sharding indexed codec's configuration parameters. *)
and shard_config =
{chunk_shape : int array
;codecs : bytestobytes shard_chain
;index_codecs : fixed_bytestobytes shard_chain
;index_location : loc}

(** A type representing the chain of codecs used to encode/decode
a shard's bytes and its index array. *)
and 'a shard_chain =
{a2a: arraytoarray list
;a2b: arraytobytes
;b2b: 'a list}

(** A type used to build a user-defined chain of codecs when creating a Zarr array. *)
type codec_chain =
[ arraytoarray | arraytobytes | bytestobytes ] list

(** The type of errors returned upon failure when an calling a function
on a {!Chain} type. *)
type error =
[ `Extension of string
| `Gzip of Ezgzip.error
| `Transpose_order of int array * string
| `CodecChain of string
| `Sharding of int array * int array * string ]
end

0 comments on commit 71216f8

Please sign in to comment.