Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enforce correct sequencing of codecs in codec chain. #65

Merged
merged 2 commits into from
Aug 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions zarr/src/codecs/array_to_bytes.ml
Original file line number Diff line number Diff line change
Expand Up @@ -333,10 +333,8 @@ end = struct
let open Util.Result_syntax in
let filter_partition f encoded =
List.fold_right (fun c (l, r) ->
match f c with
| Ok v -> v :: l, r
| Error _ -> l, c :: r) encoded ([], [])
in
Result.fold ~ok:(fun v -> v :: l, r) ~error:(fun _ -> l, c :: r) @@ f c)
encoded ([], []) in
let* codecs = match codecs with
| [] -> Error "No codec chain specified for sharding_indexed."
| y -> Ok y
Expand Down
86 changes: 38 additions & 48 deletions zarr/src/codecs/codecs.ml
Original file line number Diff line number Diff line change
Expand Up @@ -31,53 +31,45 @@
type t = (arraytobytes, bytestobytes) internal_chain

let rec create : int array -> codec_chain -> t = fun shape cc ->
let a2a, rest =
List.partition_map
(function
| #arraytoarray as c -> Either.left c
| #array_tobytes as c -> Either.right c
| #bytestobytes as c -> Either.right c) cc
let rec extract_a2a (l, r) = match r with
| #arraytoarray as x :: xs -> extract_a2a (l @ [x], xs)
| xs -> (l, xs)
in
let result =
List.fold_right
(fun c (l, r) ->
match c with
| #bytestobytes as c -> l, c :: r
| #fixed_arraytobytes as c -> c :: l, r
| `ShardingIndexed cfg ->
let codecs = create shape cfg.codecs in
let index_codecs =
create
(Array.append shape [|2|])
(cfg.index_codecs :> codec_chain) in
(* coerse to a fixed codec internal_chain list type *)
let b2b =
fst @@
List.partition_map
(function
| #fixed_bytestobytes as c -> Either.left c
| c -> Either.right c) index_codecs.b2b
in
let a2b =
List.hd @@
fst @@
List.partition_map
(function
| #fixed_arraytobytes as c -> Either.left c
| c -> Either.right c) [index_codecs.a2b]
in
let cfg' : internal_shard_config =
{codecs
;chunk_shape = cfg.chunk_shape
;index_location = cfg.index_location
;index_codecs = {index_codecs with a2b; b2b}}
in `ShardingIndexed cfg' :: l, r) rest ([], [])
let extract_a2b = function
| #fixed_arraytobytes as x :: xs -> (x, xs)
| #variable_array_tobytes as x :: xs ->
begin match x with
| `ShardingIndexed cfg ->
let codecs = create shape cfg.codecs in
let index_codecs = create
(Array.append shape [|2|])
(cfg.index_codecs :> codec_chain) in
(* coerse to a fixed codec internal_chain list type *)
let b2b = List.filter_map (function
| #fixed_bytestobytes as c -> Some c
| _ -> None) index_codecs.b2b

Check warning on line 50 in zarr/src/codecs/codecs.ml

View check run for this annotation

Codecov / codecov/patch

zarr/src/codecs/codecs.ml#L50

Added line #L50 was not covered by tests
in
let a2b = match index_codecs.a2b with
| #fixed_arraytobytes as c -> c
| _ -> raise Array_to_bytes_invariant

Check warning on line 54 in zarr/src/codecs/codecs.ml

View check run for this annotation

Codecov / codecov/patch

zarr/src/codecs/codecs.ml#L54

Added line #L54 was not covered by tests
in
let cfg' : internal_shard_config =
{codecs
;chunk_shape = cfg.chunk_shape
;index_location = cfg.index_location
;index_codecs = {index_codecs with a2b; b2b}}
in (`ShardingIndexed cfg', xs)
end
| _ -> raise Array_to_bytes_invariant
in
let a2b, b2b =
match result with
| [x], r -> x, r
| _ -> raise Bytes_to_bytes_invariant
let rec extract_b2b (l, r) = match r with
| #bytestobytes as x :: xs -> extract_b2b (l @ [x], xs)
| xs -> (l, xs)
in
let a2a, rest = extract_a2a ([], cc) in
let a2b, rest = extract_a2b rest in
let b2b, other = extract_b2b ([], rest) in
if List.compare_length_with other 0 <> 0 then raise Invalid_codec_ordering else
ArrayToBytes.parse a2b @@
(match a2a with
| [] -> shape
Expand Down Expand Up @@ -112,10 +104,8 @@
let open Util.Result_syntax in
let filter_partition f encoded =
List.fold_right (fun c (l, r) ->
match f c with
| Ok v -> v :: l, r
| Error _ -> l, c :: r) encoded ([], [])
in
Result.fold ~ok:(fun v -> v :: l, r) ~error:(fun _ -> l, c :: r) @@ f c)
encoded ([], []) in
let* codecs = match Yojson.Safe.Util.to_list x with
| [] -> Error "No codec specified."
| y -> Ok y
Expand Down
11 changes: 8 additions & 3 deletions zarr/src/codecs/codecs_intf.ml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
exception Bytes_to_bytes_invariant
exception Array_to_bytes_invariant
exception Invalid_transpose_order
exception Invalid_sharding_chunk_shape
exception Invalid_codec_ordering

type arraytoarray =
[ `Transpose of int array ]
Expand Down Expand Up @@ -48,15 +49,19 @@ type ('a, 'b) array_repr =
;shape : int array}

module type Interface = sig
exception Bytes_to_bytes_invariant
(** raised when a codec chain contains more than 1 bytes->bytes codec. *)
exception Array_to_bytes_invariant
(** raised when a codec chain contains more than 1 array->bytes codec. *)

exception Invalid_transpose_order
(** raised when a codec chain contains a Transpose codec with an incorrect order. *)

exception Invalid_sharding_chunk_shape
(** raise when a codec chain contains a shardingindexed codec with an incorrect inner chunk shape. *)

exception Invalid_codec_ordering
(** raised when a codec chain has incorrect ordering of codecs. i.e if the
ordering is not [arraytoarray list -> 1 arraytobytes -> bytestobytes list]. *)

(** The type of [array -> array] codecs. *)
type arraytoarray =
[ `Transpose of int array ]
Expand Down
9 changes: 7 additions & 2 deletions zarr/test/test_codecs.ml
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,14 @@ let tests = [
(Zarr.Codecs.Invalid_transpose_order)
(fun () -> Chain.create shape chain);

let chain = [`Transpose [|2; 1; 0|]; `ShardingIndexed shard_cfg; `Bytes BE] in
let chain = [`ShardingIndexed shard_cfg; `Transpose [|2; 1; 0|]; `Gzip L0] in
assert_raises
(Zarr.Codecs.Bytes_to_bytes_invariant)
(Zarr.Codecs.Invalid_codec_ordering)
(fun () -> Chain.create shape chain);

let chain = [`Transpose [|2; 1; 0|]; `Crc32c] in
assert_raises
(Zarr.Codecs.Array_to_bytes_invariant)
(fun () -> Chain.create shape chain);

let chain =
Expand Down
Loading