From 71216f8600db856dbeb06cd7f740253739bd1e60 Mon Sep 17 00:00:00 2001 From: Zolisa Bleki Date: Sat, 13 Jul 2024 12:41:59 +0200 Subject: [PATCH] Reduce code duplication in Codecs module. This is achieved by using the interface trick so that type definitions are contained in one place. --- lib/codecs/array_to_array.ml | 8 +-- lib/codecs/array_to_array.mli | 8 +-- lib/codecs/array_to_bytes.ml | 26 +------- lib/codecs/array_to_bytes.mli | 28 +-------- lib/codecs/bytes_to_bytes.ml | 17 +----- lib/codecs/bytes_to_bytes.mli | 17 +----- lib/codecs/codecs.ml | 24 ++------ lib/codecs/codecs.mli | 64 +------------------- lib/codecs/codecs_intf.ml | 108 ++++++++++++++++++++++++++++++++++ 9 files changed, 124 insertions(+), 176 deletions(-) create mode 100644 lib/codecs/codecs_intf.ml diff --git a/lib/codecs/array_to_array.ml b/lib/codecs/array_to_array.ml index 930d7dc..4235675 100644 --- a/lib/codecs/array_to_array.ml +++ b/lib/codecs/array_to_array.ml @@ -1,10 +1,6 @@ -module Ndarray = Owl.Dense.Ndarray.Generic - -type arraytoarray = - [ `Transpose of int array ] +open Codecs_intf -type error = - [ `Transpose_order of int array * string ] +module Ndarray = Owl.Dense.Ndarray.Generic (* https://zarr-specs.readthedocs.io/en/latest/v3/codecs/transpose/v1.0.html *) module TransposeCodec = struct diff --git a/lib/codecs/array_to_array.mli b/lib/codecs/array_to_array.mli index 47ff6b6..2f0997e 100644 --- a/lib/codecs/array_to_array.mli +++ b/lib/codecs/array_to_array.mli @@ -1,10 +1,6 @@ -module Ndarray = Owl.Dense.Ndarray.Generic - -type arraytoarray = - [ `Transpose of int array ] +open Codecs_intf -type error = - [ `Transpose_order of int array * string ] +module Ndarray = Owl.Dense.Ndarray.Generic module ArrayToArray : sig val parse diff --git a/lib/codecs/array_to_bytes.ml b/lib/codecs/array_to_bytes.ml index 2e64fee..49d4faa 100644 --- a/lib/codecs/array_to_bytes.ml +++ b/lib/codecs/array_to_bytes.ml @@ -1,34 +1,10 @@ open Array_to_array open Bytes_to_bytes open Util.Result_syntax +open Codecs_intf module Ndarray = Owl.Dense.Ndarray.Generic -type endianness = Little | Big - -type loc = Start | End - -type arraytobytes = - [ `Bytes of endianness - | `ShardingIndexed of shard_config ] - -and shard_config = - {chunk_shape : int array - ;codecs : bytestobytes shard_chain - ;index_codecs : fixed_bytestobytes shard_chain - ;index_location : loc} - -and 'a shard_chain = - {a2a: arraytoarray list - ;a2b: arraytobytes - ;b2b: 'a list} - -type error = - [ Extensions.error - | Array_to_array.error - | Bytes_to_bytes.error - | `Sharding of int array * int array * string ] - (* https://zarr-specs.readthedocs.io/en/latest/v3/codecs/bytes/v1.0.html *) module BytesCodec = struct let compute_encoded_size (input_size : int) = input_size diff --git a/lib/codecs/array_to_bytes.mli b/lib/codecs/array_to_bytes.mli index 071c2f6..91ddf1f 100644 --- a/lib/codecs/array_to_bytes.mli +++ b/lib/codecs/array_to_bytes.mli @@ -1,33 +1,7 @@ -open Array_to_array -open Bytes_to_bytes +open Codecs_intf module Ndarray = Owl.Dense.Ndarray.Generic -type endianness = Little | Big - -type loc = Start | End - -type arraytobytes = - [ `Bytes of endianness - | `ShardingIndexed of shard_config ] - -and shard_config = - {chunk_shape : int array - ;codecs : bytestobytes shard_chain - ;index_codecs : fixed_bytestobytes shard_chain - ;index_location : loc} - -and 'a shard_chain = - {a2a: arraytoarray list - ;a2b: arraytobytes - ;b2b: 'a list} - -type error = - [ Extensions.error - | Array_to_array.error - | Bytes_to_bytes.error - | `Sharding of int array * int array * string ] - module ArrayToBytes : sig val parse : ('a, 'b) Util.array_repr -> diff --git a/lib/codecs/bytes_to_bytes.ml b/lib/codecs/bytes_to_bytes.ml index 489d87e..867341f 100644 --- a/lib/codecs/bytes_to_bytes.ml +++ b/lib/codecs/bytes_to_bytes.ml @@ -1,19 +1,6 @@ -module Ndarray = Owl.Dense.Ndarray.Generic - -type compression_level = - | L0 | L1 | L2 | L3 | L4 | L5 | L6 | L7 | L8 | L9 - -type fixed_bytestobytes = - [ `Crc32c ] +open Codecs_intf -type variable_bytestobytes = - [ `Gzip of compression_level ] - -type bytestobytes = - [ fixed_bytestobytes | variable_bytestobytes ] - -type error = - [ `Gzip of Ezgzip.error ] +module Ndarray = Owl.Dense.Ndarray.Generic (* https://zarr-specs.readthedocs.io/en/latest/v3/codecs/gzip/v1.0.html *) module GzipCodec = struct diff --git a/lib/codecs/bytes_to_bytes.mli b/lib/codecs/bytes_to_bytes.mli index e1cf957..2dd203b 100644 --- a/lib/codecs/bytes_to_bytes.mli +++ b/lib/codecs/bytes_to_bytes.mli @@ -1,19 +1,6 @@ -module Ndarray = Owl.Dense.Ndarray.Generic - -type compression_level = - | L0 | L1 | L2 | L3 | L4 | L5 | L6 | L7 | L8 | L9 - -type fixed_bytestobytes = - [ `Crc32c ] +open Codecs_intf -type variable_bytestobytes = - [ `Gzip of compression_level ] - -type bytestobytes = - [ fixed_bytestobytes | variable_bytestobytes ] - -type error = - [ `Gzip of Ezgzip.error ] +module Ndarray = Owl.Dense.Ndarray.Generic module BytesToBytes : sig val compute_encoded_size : int -> fixed_bytestobytes -> int diff --git a/lib/codecs/codecs.ml b/lib/codecs/codecs.ml index ff5246f..df64d65 100644 --- a/lib/codecs/codecs.ml +++ b/lib/codecs/codecs.ml @@ -1,26 +1,14 @@ -include Bytes_to_bytes -include Array_to_array -include Array_to_bytes +open Bytes_to_bytes +open Array_to_array +open Array_to_bytes open Util.Result_syntax -module Ndarray = Owl.Dense.Ndarray.Generic - -type error = - [ `Extension of string - | `Gzip of Ezgzip.error - | `Transpose_order of int array * string - | `CodecChain of string - | `Sharding of int array * int array * string ] - -type codec_chain = - [ arraytoarray | arraytobytes | bytestobytes ] list +include Codecs_intf type internal_chain = {a2a : arraytoarray list ;a2b : arraytobytes ;b2b : bytestobytes list} - (*;b2b_fixed : fixed_bytestobytes list - ;b2b_variable : variable_bytestobytes list} *) module Chain = struct type t = internal_chain @@ -42,10 +30,6 @@ module Chain = struct | _ -> Result.error @@ `CodecChain "Must be exactly one array->bytes codec.") >>= fun (a2b, b2b) -> - (* let b2b_fixed, b2b_variable = List.partition_map (function - | #fixed_bytestobytes as c -> Either.left c - | #variable_bytestobytes as c -> Either.right c) rest - in *) let ic = {a2a; a2b; b2b} in List.fold_left (fun acc c -> diff --git a/lib/codecs/codecs.mli b/lib/codecs/codecs.mli index 5e70c9c..1752413 100644 --- a/lib/codecs/codecs.mli +++ b/lib/codecs/codecs.mli @@ -3,69 +3,9 @@ This module contains building blocks for creating and working with a chain of codecs. *) -module Ndarray = Owl.Dense.Ndarray.Generic - -(** The type of [array -> array] codecs. *) -type arraytoarray = - [ `Transpose of int array ] - -(** A type representing valid Gzip codec compression levels. *) -type compression_level = - | L0 | L1 | L2 | L3 | L4 | L5 | L6 | L7 | L8 | L9 - -(** A type representing [bytes -> bytes] codecs that produce - fixed sized encoded strings. *) -type fixed_bytestobytes = - [ `Crc32c ] - -(** A type representing [bytes -> bytes] codecs that produce - variable sized encoded strings. *) -type variable_bytestobytes = - [ `Gzip of compression_level ] - -(** The type of [bytes -> bytes] codecs. *) -type bytestobytes = - [ fixed_bytestobytes | variable_bytestobytes ] - -(** A type representing the configured endianness of an array. *) -type endianness = Little | Big - -(** A type representing the location of a shard's index array in - an encoded byte string. *) -type loc = Start | End - -(** The type of [array -> bytes] codecs. *) -type arraytobytes = - [ `Bytes of endianness - | `ShardingIndexed of shard_config ] - -(** A type representing the Sharding indexed codec's configuration parameters. *) -and shard_config = - {chunk_shape : int array - ;codecs : bytestobytes shard_chain - ;index_codecs : fixed_bytestobytes shard_chain - ;index_location : loc} - -(** A type representing the chain of codecs used to encode/decode - a shard's bytes and its index array. *) -and 'a shard_chain = - {a2a: arraytoarray list - ;a2b: arraytobytes - ;b2b: 'a list} - -(** A type used to build a user-defined chain of codecs when creating a Zarr array. *) -type codec_chain = - [ arraytoarray | arraytobytes | bytestobytes ] list - -(** The type of errors returned upon failure when an calling a function - on a {!Chain} type. *) -type error = - [ `Extension of string - | `Gzip of Ezgzip.error - | `Transpose_order of int array * string - | `CodecChain of string - | `Sharding of int array * int array * string ] +include Codecs_intf.Interface +module Ndarray = Owl.Dense.Ndarray.Generic (** A module containing functions to encode/decode an array chunk using a predefined set of codecs. *) diff --git a/lib/codecs/codecs_intf.ml b/lib/codecs/codecs_intf.ml new file mode 100644 index 0000000..11a9e0c --- /dev/null +++ b/lib/codecs/codecs_intf.ml @@ -0,0 +1,108 @@ +type arraytoarray = + [ `Transpose of int array ] + +type compression_level = + | L0 | L1 | L2 | L3 | L4 | L5 | L6 | L7 | L8 | L9 + +type fixed_bytestobytes = + [ `Crc32c ] + +type variable_bytestobytes = + [ `Gzip of compression_level ] + +type bytestobytes = + [ fixed_bytestobytes | variable_bytestobytes ] + +type endianness = Little | Big + +type loc = Start | End + +type arraytobytes = + [ `Bytes of endianness + | `ShardingIndexed of shard_config ] + +and shard_config = + {chunk_shape : int array + ;codecs : bytestobytes shard_chain + ;index_codecs : fixed_bytestobytes shard_chain + ;index_location : loc} + +and 'a shard_chain = + {a2a: arraytoarray list + ;a2b: arraytobytes + ;b2b: 'a list} + +type codec_chain = + [ arraytoarray | arraytobytes | bytestobytes ] list + +type error = + [ `Extension of string + | `Gzip of Ezgzip.error + | `Transpose_order of int array * string + | `CodecChain of string + | `Sharding of int array * int array * string ] + +module Ndarray = Owl.Dense.Ndarray.Generic + +module type Interface = sig + (** The type of [array -> array] codecs. *) + type arraytoarray = + [ `Transpose of int array ] + + (** A type representing valid Gzip codec compression levels. *) + type compression_level = + | L0 | L1 | L2 | L3 | L4 | L5 | L6 | L7 | L8 | L9 + + (** A type representing [bytes -> bytes] codecs that produce + fixed sized encoded strings. *) + type fixed_bytestobytes = + [ `Crc32c ] + + (** A type representing [bytes -> bytes] codecs that produce + variable sized encoded strings. *) + type variable_bytestobytes = + [ `Gzip of compression_level ] + + (** The type of [bytes -> bytes] codecs. *) + type bytestobytes = + [ fixed_bytestobytes | variable_bytestobytes ] + + (** A type representing the configured endianness of an array. *) + type endianness = Little | Big + + (** A type representing the location of a shard's index array in + an encoded byte string. *) + type loc = Start | End + + (** The type of [array -> bytes] codecs. *) + type arraytobytes = + [ `Bytes of endianness + | `ShardingIndexed of shard_config ] + + (** A type representing the Sharding indexed codec's configuration parameters. *) + and shard_config = + {chunk_shape : int array + ;codecs : bytestobytes shard_chain + ;index_codecs : fixed_bytestobytes shard_chain + ;index_location : loc} + + (** A type representing the chain of codecs used to encode/decode + a shard's bytes and its index array. *) + and 'a shard_chain = + {a2a: arraytoarray list + ;a2b: arraytobytes + ;b2b: 'a list} + + (** A type used to build a user-defined chain of codecs when creating a Zarr array. *) + type codec_chain = + [ arraytoarray | arraytobytes | bytestobytes ] list + + (** The type of errors returned upon failure when an calling a function + on a {!Chain} type. *) + type error = + [ `Extension of string + | `Gzip of Ezgzip.error + | `Transpose_order of int array * string + | `CodecChain of string + | `Sharding of int array * int array * string ] +end