Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make pcodec compatible with numcodecs #43

Merged
merged 1 commit into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- **Breaking**: All `ZfpCodec::new_*` methods now take a `write_header: bool` parameter
- **Breaking**: Add `ArrayMetadataV2ToV3ConversionError::Other`
- Make all v2 metadata available even without experimental codec features
- **Breaking**: Change pcodec `max_page_n` to `equal_pages_up_to` to match numcodecs

### Removed
- **Breaking**: Remove `into_array_view` array and codec API
Expand Down
2 changes: 1 addition & 1 deletion doc/status/codecs_experimental.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Experimental codecs are recommended for evaluation only.
| -------------- | ------------------------ | --- | ------- | ------- | ------------ |
| Array to Array | [bitround] | | ✓ | ✓ | bitround |
| Array to Bytes | [zfp]<br>zfpy (V2) | | &check; | &check; | zfp |
| | [pcodec] | | &check; | | pcodec |
| | [pcodec] | | &check; | &check; | pcodec |
| | [vlen] | | &check; | | |
| | [vlen_v2]<br>vlen-* (V2) | | &check; | &check; | |
| Bytes to Bytes | [bz2] | | &check; | &check; | bz2 |
Expand Down
9 changes: 9 additions & 0 deletions src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -978,6 +978,15 @@ mod tests {
)
}

#[cfg(feature = "pcodec")]
#[test]
fn array_v2_pcodec_c() {
array_v2_to_v3(
"tests/data/v2/array_pcodec_C.zarr",
"tests/data/v3/array_pcodec.zarr",
)
}

// fn array_subset_locking(locks: StoreLocks, expect_equal: bool) {
// let store = Arc::new(MemoryStore::new_with_locks(locks));

Expand Down
2 changes: 1 addition & 1 deletion src/array/codec/array_to_bytes/pcodec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ mod tests {
"level": 8,
"delta_encoding_order": 2,
"mode_spec": "auto",
"max_page_n": 262144
"equal_pages_up_to": 262144
}"#;

#[test]
Expand Down
6 changes: 3 additions & 3 deletions src/array/codec/array_to_bytes/pcodec/pcodec_codec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
.map(|order| order.as_usize()),
)
.with_mode_spec(mode_spec)
.with_paging_spec(PagingSpec::EqualPagesUpTo(configuration.max_page_n))
.with_paging_spec(PagingSpec::EqualPagesUpTo(configuration.equal_pages_up_to))
}

impl PcodecCodec {
Expand All @@ -76,7 +76,7 @@

impl CodecTraits for PcodecCodec {
fn create_metadata_opt(&self, _options: &ArrayMetadataOptions) -> Option<MetadataV3> {
let PagingSpec::EqualPagesUpTo(max_page_n) = self.chunk_config.paging_spec else {
let PagingSpec::EqualPagesUpTo(equal_pages_up_to) = self.chunk_config.paging_spec else {

Check warning on line 79 in src/array/codec/array_to_bytes/pcodec/pcodec_codec.rs

View check run for this annotation

Codecov / codecov/patch

src/array/codec/array_to_bytes/pcodec/pcodec_codec.rs#L79

Added line #L79 was not covered by tests
unreachable!()
};
let configuration = PcodecCodecConfiguration::V1(PcodecCodecConfigurationV1 {
Expand All @@ -86,7 +86,7 @@
.delta_encoding_order
.map(|order| PcodecDeltaEncodingOrder::try_from(order).unwrap()),
mode_spec: mode_spec_pco_to_config(&self.chunk_config.mode_spec),
max_page_n,
equal_pages_up_to,

Check warning on line 89 in src/array/codec/array_to_bytes/pcodec/pcodec_codec.rs

View check run for this annotation

Codecov / codecov/patch

src/array/codec/array_to_bytes/pcodec/pcodec_codec.rs#L89

Added line #L89 was not covered by tests
});

Some(
Expand Down
10 changes: 9 additions & 1 deletion src/metadata/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,14 @@ pub fn array_metadata_v2_to_v3(
)?;
codecs.push(zfp_v3_metadata);
}
super::v3::codec::pcodec::IDENTIFIER => {
// pcodec is v2/v3 compatible
has_array_to_bytes = true;
codecs.push(MetadataV3::new_with_configuration(
compressor.id(),
compressor.configuration().clone(),
));
}
_ => {}
}
}
Expand All @@ -202,7 +210,7 @@ pub fn array_metadata_v2_to_v3(
// Compressor (bytes to bytes codec)
if let Some(compressor) = &array_metadata_v2.compressor {
match compressor.id() {
super::v2::codec::zfpy::IDENTIFIER => {
super::v2::codec::zfpy::IDENTIFIER | super::v3::codec::pcodec::IDENTIFIER => {
// already handled above
}
super::v3::codec::blosc::IDENTIFIER => {
Expand Down
28 changes: 14 additions & 14 deletions src/metadata/v3/codec/pcodec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
/// See <https://docs.rs/pco/latest/pco/enum.PagingSpec.html#variant.EqualPagesUpTo>.
///
/// The default is `1 << 18`.
pub max_page_n: usize,
pub equal_pages_up_to: usize,
}

/// Specifies how Pco should choose a [`mode`][pco::Mode] to compress this
Expand Down Expand Up @@ -121,8 +121,8 @@
base: Option<UIntOrFloat>,
#[serde(skip_serializing_if = "Option::is_none")]
k: Option<u32>,
#[serde(default = "default_max_page_n")]
max_page_n: usize,
#[serde(default = "default_equal_pages_up_to")]
equal_pages_up_to: usize,
}

impl serde::Serialize for PcodecCodecConfigurationV1 {
Expand Down Expand Up @@ -157,7 +157,7 @@
mode_spec,
base,
k,
max_page_n: self.max_page_n,
equal_pages_up_to: self.equal_pages_up_to,

Check warning on line 160 in src/metadata/v3/codec/pcodec.rs

View check run for this annotation

Codecov / codecov/patch

src/metadata/v3/codec/pcodec.rs#L160

Added line #L160 was not covered by tests
};
config.serialize(s)
}
Expand Down Expand Up @@ -194,7 +194,7 @@
level: config.level,
delta_encoding_order: config.delta_encoding_order,
mode_spec,
max_page_n: config.max_page_n,
equal_pages_up_to: config.equal_pages_up_to,
};
Ok(config)
}
Expand All @@ -206,7 +206,7 @@
level: PcodecCompressionLevel::default(),
delta_encoding_order: None,
mode_spec: PcodecModeSpecConfiguration::Auto,
max_page_n: default_max_page_n(),
equal_pages_up_to: default_equal_pages_up_to(),

Check warning on line 209 in src/metadata/v3/codec/pcodec.rs

View check run for this annotation

Codecov / codecov/patch

src/metadata/v3/codec/pcodec.rs#L209

Added line #L209 was not covered by tests
}
}
}
Expand Down Expand Up @@ -359,7 +359,7 @@
}
}

const fn default_max_page_n() -> usize {
const fn default_equal_pages_up_to() -> usize {
// pco::constants::DEFAULT_MAX_PAGE_N
1 << 18
}
Expand Down Expand Up @@ -392,7 +392,7 @@
"level": 8,
"delta_encoding_order": 2,
"mode_spec": "auto",
"max_page_n": 262144
"equal_pages_up_to": 262144
}"#,
)
.unwrap();
Expand All @@ -405,7 +405,7 @@
"level": 8,
"delta_encoding_order": 2,
"mode_spec": "classic",
"max_page_n": 262144
"equal_pages_up_to": 262144
}"#,
)
.unwrap();
Expand All @@ -419,7 +419,7 @@
"delta_encoding_order": 2,
"mode_spec": "try_float_mult",
"base": 0.1,
"max_page_n": 262144
"equal_pages_up_to": 262144
}"#,
)
.unwrap();
Expand All @@ -433,7 +433,7 @@
"delta_encoding_order": 2,
"mode_spec": "try_float_quant",
"k": 1,
"max_page_n": 262144
"equal_pages_up_to": 262144
}"#,
)
.unwrap();
Expand All @@ -447,7 +447,7 @@
"delta_encoding_order": 2,
"mode_spec": "try_int_mult",
"base": 1,
"max_page_n": 262144
"equal_pages_up_to": 262144
}"#,
)
.unwrap();
Expand All @@ -460,7 +460,7 @@
"level": 13,
"delta_encoding_order": 2,
"mode_spec": "auto",
"max_page_n": 262144
"equal_pages_up_to": 262144
}"#,
)
.is_err());
Expand All @@ -473,7 +473,7 @@
"level": 8,
"delta_encoding_order": 8,
"mode_spec": "auto",
"max_page_n": 262144
"equal_pages_up_to": 262144
}"#,
)
.is_err());
Expand Down
22 changes: 22 additions & 0 deletions tests/data/v2/array_pcodec_C.zarr/.zarray
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"chunks": [
5,
5
],
"compressor": {
"delta_encoding_order": null,
"equal_pages_up_to": 262144,
"id": "pcodec",
"level": 8,
"mode_spec": "auto"
},
"dtype": "<f4",
"fill_value": 0.0,
"filters": null,
"order": "C",
"shape": [
10,
10
],
"zarr_format": 2
}
3 changes: 3 additions & 0 deletions tests/data/v2/array_pcodec_C.zarr/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"key": "value"
}
Binary file added tests/data/v2/array_pcodec_C.zarr/0.0
Binary file not shown.
Binary file added tests/data/v2/array_pcodec_C.zarr/0.1
Binary file not shown.
Binary file added tests/data/v2/array_pcodec_C.zarr/1.0
Binary file not shown.
Binary file added tests/data/v2/array_pcodec_C.zarr/1.1
Binary file not shown.
9 changes: 7 additions & 2 deletions tests/data/v2_generate.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import zarr
import numpy as np
from numcodecs import Blosc, GZip, BZ2, ZFPY
from numcodecs import Blosc, GZip, BZ2, ZFPY, PCodec

compressor_blosc = Blosc(cname="zstd", clevel=1, shuffle=Blosc.BITSHUFFLE)
compressor_gzip = GZip(level=9)
compressor_bz2 = BZ2(level=9)
compressor_zfpy = ZFPY(mode = 4, tolerance=0.01) # fixed accuracy
compressor_pcodec = PCodec(level = 8, mode_spec="auto")

data = np.array(
[
Expand All @@ -28,12 +29,16 @@
("gzip", compressor_gzip),
("bz2", compressor_bz2),
("zfpy", compressor_zfpy),
("pcodec", compressor_pcodec),
]:
if order == "F" and compressor_name != "blosc":
continue

store = zarr.DirectoryStore(f"tests/data/v2/array_{compressor_name}_{order}.zarr")
store.clear()
try:
store.clear()
except FileNotFoundError:
pass
array = zarr.creation.create(
shape=[10, 10],
chunks=[5, 5],
Expand Down
24 changes: 24 additions & 0 deletions tests/data/v3/array_pcodec.zarr/.zarray
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"node_type": "array",
"zarr_format": 2,
"shape": [
10,
10
],
"chunks": [
5,
5
],
"dtype": "<f4",
"compressor": {
"id": "pcodec",
"delta_encoding_order": null,
"equal_pages_up_to": 262144,
"level": 8,
"mode_spec": "auto"
},
"fill_value": 0.0,
"order": "C",
"filters": null,
"dimension_separator": "."
}
3 changes: 3 additions & 0 deletions tests/data/v3/array_pcodec.zarr/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"key": "value"
}
Binary file added tests/data/v3/array_pcodec.zarr/0.0
Binary file not shown.
Binary file added tests/data/v3/array_pcodec.zarr/0.1
Binary file not shown.
Binary file added tests/data/v3/array_pcodec.zarr/1.0
Binary file not shown.
Binary file added tests/data/v3/array_pcodec.zarr/1.1
Binary file not shown.
39 changes: 39 additions & 0 deletions tests/data/v3/array_pcodec.zarr/zarr.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
"zarr_format": 3,
"node_type": "array",
"shape": [
10,
10
],
"data_type": "float32",
"chunk_grid": {
"name": "regular",
"configuration": {
"chunk_shape": [
5,
5
]
}
},
"chunk_key_encoding": {
"name": "v2",
"configuration": {
"separator": "."
}
},
"fill_value": 0.0,
"codecs": [
{
"name": "pcodec",
"configuration": {
"delta_encoding_order": null,
"equal_pages_up_to": 262144,
"level": 8,
"mode_spec": "auto"
}
}
],
"attributes": {
"key": "value"
}
}