diff --git a/doc/information.rst b/doc/information.rst index 2894e8ac..3edace03 100644 --- a/doc/information.rst +++ b/doc/information.rst @@ -77,7 +77,7 @@ HDF5 compression filters and compression libraries sources were obtained from: * `SZ plugin `_ (commit `f466775 `_) using `SZ `_, ZLib and ZStd. -* `H5Z-SPERR plugin `_ (v0.1.3) using `SPERR `_ (v0.8.1). +* `H5Z-SPERR plugin `_ (v0.1.3) using `SPERR `_ (v0.8.2). * `SZ3 plugin `_ (commit `4bbe9df7e4bcb `_) using `SZ3 `_ and ZStd. diff --git a/src/SPERR/.github/workflows/clang-format.yml b/src/SPERR/.github/workflows/clang-format.yml index 7b8b2393..8c19b1ea 100644 --- a/src/SPERR/.github/workflows/clang-format.yml +++ b/src/SPERR/.github/workflows/clang-format.yml @@ -22,9 +22,9 @@ jobs: steps: - uses: actions/checkout@v2 - name: Run clang-format style check for C/C++ programs. - uses: jidicula/clang-format-action@v4.8.0 + uses: jidicula/clang-format-action@v4.13.0 with: - clang-format-version: '13' + clang-format-version: '18' check-path: ${{ matrix.path['check'] }} exclude-regex: ${{ matrix.path['exclude'] }} fallback-style: 'Chromium' # optional diff --git a/src/SPERR/.github/workflows/clang-tidy.yml b/src/SPERR/.github/workflows/clang-tidy.yml.bkp similarity index 96% rename from src/SPERR/.github/workflows/clang-tidy.yml rename to src/SPERR/.github/workflows/clang-tidy.yml.bkp index e4423b3f..859d8a73 100644 --- a/src/SPERR/.github/workflows/clang-tidy.yml +++ b/src/SPERR/.github/workflows/clang-tidy.yml.bkp @@ -22,4 +22,4 @@ jobs: builddir: 'build' excludedirs: '' extensions: 'c,h,C,H,cpp,hpp,cc,hh,c++,h++,cxx,hxx' - cmakeoptions: '-DBUILD_CLI_UTILITIES=OFF -DBUILD_UNIT_TESTS=OFF -DUSE_ZSTD=OFF' + cmakeoptions: '-DBUILD_CLI_UTILITIES=OFF -DBUILD_UNIT_TESTS=OFF' diff --git a/src/SPERR/CMakeLists.txt b/src/SPERR/CMakeLists.txt index a48d03af..fb91e92d 100644 --- a/src/SPERR/CMakeLists.txt +++ b/src/SPERR/CMakeLists.txt @@ -2,14 +2,13 @@ cmake_minimum_required(VERSION 3.14) -project(SPERR VERSION 0.8.1 LANGUAGES CXX DESCRIPTION "Lossy Scientific Compression with SPERR") +project(SPERR VERSION 0.8.2 LANGUAGES CXX DESCRIPTION "Lossy Scientific Compression with SPERR") + +if(NOT CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD "20" CACHE STRING "Choose the C++ Standard to use." FORCE) + set_property(CACHE CMAKE_CXX_STANDARD PROPERTY STRINGS "20" "17") +endif() -# -# specify the C++ standard -# CMake will try to add a -std=c++20 flag if the compiler supports, but if it doesn't, -# CMake will `decay` to a previous supported flag, e.g. -std=c++11. -# -set(CMAKE_CXX_STANDARD 20) if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build." FORCE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "RelWithDebInfo") @@ -32,7 +31,7 @@ endif() option( BUILD_SHARED_LIBS "Build shared SPERR library" ON ) option( BUILD_UNIT_TESTS "Build unit tests using GoogleTest" ON ) option( BUILD_CLI_UTILITIES "Build a set of command line utilities" ON ) -option( USE_OMP "Use OpenMP parallelization on 3D volumes" ON ) +option( USE_OMP "Use OpenMP parallelization on 3D volumes" OFF ) option( SPERR_PREFER_RPATH "Set RPATH; this can fight with package managers so turn off when building for them" ON ) mark_as_advanced(FORCE SPERR_PREFER_RPATH) @@ -109,8 +108,7 @@ if( BUILD_CLI_UTILITIES ) set( CLI11_SINGLE_FILE OFF CACHE INTERNAL "Don't use single file CLI11") FetchContent_Declare( cli11 GIT_REPOSITORY https://github.com/CLIUtils/CLI11 - GIT_TAG 291c58789c031208f08f4f261a858b5b7083e8e2 # v2.3.2 - PATCH_COMMAND patch -N CMakeLists.txt < ${CMAKE_SOURCE_DIR}/cli11.patch || true + GIT_TAG 6c7b07a878ad834957b98d0f9ce1dbe0cb204fc9 # v2.4.2 ) FetchContent_MakeAvailable(cli11) diff --git a/src/SPERR/README.md b/src/SPERR/README.md index 90ef7365..8564cd22 100644 --- a/src/SPERR/README.md +++ b/src/SPERR/README.md @@ -1,9 +1,6 @@ [![clang-format](https://github.com/NCAR/SPERR/actions/workflows/clang-format.yml/badge.svg)](https://github.com/NCAR/SPERR/actions/workflows/clang-format.yml) -[![clang-tidy](https://github.com/NCAR/SPERR/actions/workflows/clang-tidy.yml/badge.svg)](https://github.com/NCAR/SPERR/actions/workflows/clang-tidy.yml) [![unit-test](https://github.com/NCAR/SPERR/actions/workflows/unit-test.yml/badge.svg)](https://github.com/NCAR/SPERR/actions/workflows/unit-test.yml) [![CodeQL](https://github.com/NCAR/SPERR/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/NCAR/SPERR/actions/workflows/codeql-analysis.yml) - - [![DOI](https://zenodo.org/badge/225491235.svg)](https://zenodo.org/badge/latestdoi/225491235) @@ -20,6 +17,31 @@ This combination gives SPERR flexibility to compress targetting different qualit 2) peak signal-to-noise ratio (PSNR), and 3) point-wise error (PWE). The name of SPERR stands for **SP**eck with **ERR**or bounding. +## Quick Build +SPERR requires 1) a working C++ compiler and 2) CMake tools to build. On a Unix-like system, +the build steps are the following: + +```bash +git clone https://github.com/NCAR/SPERR.git # clone the repo +mkdir SPERR/build # create the build directory +cd SPERR/build # enter the build directory +cmake .. # use cmake to configure the project +cmake -DUSE_OMP=ON .. # Optional: enable OpenMP on 3D volumes. +cmake -DCMAKE_INSTALL_PREFIX=/my/install/dir .. # Optional: specify a directory to install SPERR. The default is /usr/local . +cmake -DCMAKE_CXX_STANDARD=17 .. # Optional: use C++17 rather than C++20. The code is slightly faster with C++20. +make -j 8 # build the project +ctest . # run unit tests, which should have 100% tests passed +make install # install the library and CLI tools to a specified directory. +``` + +## Plugin for HDF5 +SPERR is available as a *dynamically loaded plugin* for HDF5 with a registered ID of `32028`. +This plugin is available at this [repo](https://github.com/NCAR/H5Z-SPERR). + +## Wrapper for Fortran +A Fortran wrapper for SPERR has also been created by [ofmla](https://github.com/ofmla) +at this [repo](https://github.com/ofmla/fortran-sperr). + ## Documentation SPERR documentation is hosted on Github [Wiki](https://github.com/NCAR/SPERR/wiki) pages. To get started, one might want to @@ -45,5 +67,6 @@ If SPERR benefits your work, please kindly cite [this publication](https://ieeex (Author's copy is available [here](https://vast.ucar.edu/pdfs/SPERR_IPDPS.pdf).) ## Presentations +- FZ Workshop Hands-on: Feb 15 2024, Sarasota, FL. ([handout and examples](https://vast.ucar.edu/pdfs/Li_FZ2024.pdf)) - SC'23 Tutorial on lossy scientific data compression: Nov 13 2023, Denver CO. ([slides](https://vast.ucar.edu/pdfs/Li_SC23_Slides.pdf)) - IPDPS'23 Lossy Scientific Data Compression With SPERR: May 18 2023, St. Petersburg, FL. ([slides](https://vast.ucar.edu/pdfs/Li_IPDPS23_Slides.pdf)) diff --git a/src/SPERR/cli11.patch b/src/SPERR/cli11.patch deleted file mode 100644 index 3edf079b..00000000 --- a/src/SPERR/cli11.patch +++ /dev/null @@ -1,4 +0,0 @@ -1c1 -< cmake_minimum_required(VERSION 3.4) ---- -> cmake_minimum_required(VERSION 3.14) diff --git a/src/SPERR/handout/compile.txt b/src/SPERR/handout/compile.txt new file mode 100644 index 00000000..32c9199b --- /dev/null +++ b/src/SPERR/handout/compile.txt @@ -0,0 +1 @@ +xelatex -shell-escape main.tex diff --git a/src/SPERR/handout/main.tex b/src/SPERR/handout/main.tex new file mode 100644 index 00000000..d5a7c4ba --- /dev/null +++ b/src/SPERR/handout/main.tex @@ -0,0 +1,568 @@ +\documentclass{article} +\usepackage{graphicx} % Required for inserting images +\usepackage[colorlinks]{hyperref} +\usepackage[margin=1in]{geometry} +\usepackage[many]{tcolorbox} % for COLORED BOXES (tikz and xcolor included) +\usepackage{minted} +\usepackage{xspace} +\setcounter{tocdepth}{1} +\setminted{linenos} +\setlength\parindent{0pt} % killing indentation for all the text +\setlength\columnsep{0.25in} % setting length of column separator +\pagestyle{empty} % setting pagestyle to be empty +\definecolor{main}{HTML}{5989cf} % setting main color to be used +\definecolor{sub}{HTML}{cde4ff} % setting sub color to be used +\tcbset{ + sharp corners, + colback = white, + before skip = 0.2cm, % add extra space before the box + after skip = 0.5cm % add extra space after the box +} % setting global options for tcolorbox +\newtcolorbox{BlueBox}{ + sharpish corners, % better drop shadow + colback = sub, + colframe = main, + boxrule = 0pt, + toprule = 4.5pt, % top rule weight + enhanced, + fuzzy shadow = {0pt}{-2pt}{-0.5pt}{0.5pt}{black!35} % {xshift}{yshift}{offset}{step}{options} +} +\newcommand{\callout}[1]{\begin{BlueBox}#1\end{BlueBox}} + + + +\title{SPERR Interface and Examples} +\author{Samuel Li} +\date{\today} + +\begin{document} + +\vspace{-1cm} +\maketitle + +\vspace{-1cm} + +\section{Introduction} +This document is the SPERR section of the handout for the +\href{https://szcompressor.org/next.szcompressor.github.io/meetings/feb24fl/}{FZ workshop} +hands-on session on February 15 2024, Sarasota, FL. +It provides examples for the CLI interface, C++ interface, and C interface of SPERR +(Section~\ref{sec:cli}, \ref{sec:cpp}, and \ref{sec:c}, respectively). + +\section{SPERR} +Contact: \href{shaomeng@ucar.edu}{Samuel Li (shaomeng@ucar.edu)} \\ +Repo: \href{https://github.com/NCAR/SPERR/}{github.com/NCAR/SPERR/} \\ +Wiki: \href{https://github.com/NCAR/SPERR/wiki}{github.com/NCAR/SPERR/wiki} + +\vspace{2mm} +SPERR uses \textit{wavelet transforms} to decorrelate the data, encodes the quantized +coefficients, and explicitly corrects any data point exceeding a prescribed point-wise error +(PWE) tolerance. +Most often, SPERR produces the smallest compressed bitstream +honoring a PWE tolerance. + +A SPERR bitstream can be used to reconstruct the data fields +in two additional fashions: \textit{flexible-rate} decoding and +\textit{multi-resolution} decoding. + +\vspace{-2mm} +\begin{itemize} +\item \textit{Flexible-rate} decoding: any prefix of a SPERR bitstream +(i.e., a sub-bitstream that starts from the very beginning) +produced by a simple truncation is still valid to reconstruct the data +field, though at a lower quality. +This ability is useful for applications such as tiered storage and data sharing +over slow connections, to name a few. +\vspace{-2mm} +\item \textit{Multi-resolution} decoding: a hierarchy of the data field +with coarsened resolutions can be obtained together with the +native resolution. +This ability is useful for quick analyses with limited resources. +\end{itemize} + +\callout{On a Unix-like system with a working C++ compiler and CMake, SPERR can be +built from source and made available to users in just a few commands. +See this \href{https://github.com/NCAR/SPERR?tab=readme-ov-file\#quick-build}{README} +for detail.} + + +\subsection{CLI Interface} +\label{sec:cli} +Upon a successful build, four CLI utility programs are placed in the \texttt{./bin/} +directory; three of them are most relevant for end users: \texttt{sperr2d}, +\texttt{sperr3d}, and \texttt{sperr3d\_trunc}; +each of them can be invoked with the \texttt{-h} option to display a help message. + +\subsubsection{\texttt{sperr2d}} +\texttt{sperr2d} is responsible for compressing and decompressing a 2D data slice. +In compression mode (\texttt{-c}), it takes as input a raw binary file consisting of +single- or double-precision floating point values, and outputs a compressed bitstream. +In decompression mode (\texttt{-d}), it takes as input a compressed bitstream, and +outputs the decompressed binary file of single- or double-precision floating point values. +Its help message contains all the options \texttt{sperr2d} takes: + +\begin{minted}{bash} +$ ./bin/sperr2d -h + +Usage: ./bin/sperr2d [OPTIONS] [filename] + +Positionals: + filename TEXT:FILE A data slice to be compressed, or + a bitstream to be decompressed. + +Options: + -h,--help Print this help message and exit + +Execution settings: + -c Excludes: -d Perform a compression task. + -d Excludes: -c Perform a decompression task. + +Input properties: + --ftype UINT Specify the input float type in bits. Must be 32 or 64. + --dims [UINT,UINT] Dimensions of the input slice. E.g., `--dims 128 128` + (The fastest-varying dimension appears first.) + +Output settings: + --bitstream TEXT Output compressed bitstream. + --decomp_f TEXT Output decompressed slice in f32 precision. + --decomp_d TEXT Output decompressed slice in f64 precision. + --decomp_lowres_f TEXT Output lower resolutions of the decompressed slice in f32 precision. + --decomp_lowres_d TEXT Output lower resolutions of the decompressed slice in f64 precision. + --print_stats Needs: -c Show statistics measuring the compression quality. + +Compression settings (choose one): + --pwe FLOAT Excludes: --psnr --bpp + Maximum point-wise error (PWE) tolerance. + --psnr FLOAT Excludes: --pwe --bpp + Target PSNR to achieve. + --bpp FLOAT:FLOAT in [0 - 64] Excludes: --pwe --psnr + Target bit-per-pixel (bpp) to achieve. +\end{minted} + +Examples: +\begin{enumerate} +\item Compress a 2D slice in $512\times512$ dimension, single-precision floats + with a PWE tolerance of $10^{-2}$: \\ + \texttt{./bin/sperr2d -c --ftype 32 --dims 512 512 --pwe 1e-2 + \textbackslash \\ + --bitstream ./out.stream ./in.f32} + +\item Perform the compression task described above, plus also write out the + compress-decompressed slice, and finally print statistics + measuring the compression quality: \\ + \texttt{./bin/sperr2d -c --ftype 32 --dims 512 512 --pwe 1e-2 + \textbackslash \\ + --decomp\_f ./out.decomp --print\_stats + --bitstream ./out.stream ./in.f32} + +\item Decompress from a SPERR bitstream, and write out the slice in native + and coarsened resolutions: \\ + \texttt{./bin/sperr2d -d --decomp\_f ./out.decomp --decomp\_lowres\_f ./out.lowres + ./sperr.stream} + In this example, the output file \texttt{out.decomp} will be of the native resolution, + and six other files (\texttt{out.lowres.256x256, out.lowres.128x128, etc.}) + will also be produced with their filenames indicating the coarsened resolution. +\end{enumerate} + +\subsubsection{sperr3d} +\label{sec:sperr3d} +\texttt{sperr3d} is responsible for compressing and decompressing a 2D data volume. +Similar to \texttt{sperr2d}, it operates in either compression (\texttt{-c}) or +decompression (\texttt{-d}) mode, converting between raw binary floating-point values +and compressed bitstreams. +Different from \texttt{sperr2d} which compresses the input 2D slice as a whole, +\texttt{sperr3d} divides an input 3D volume into smaller chunks, and then compresses +each chunk individually. +This chunking step allows for compressing and decompressing all the small chunks +individually and in parallel. +\texttt{sperr3d} uses $256^3$ as the default chunk size, but any number from dozens to +low hundreds would work well. +(Chunk dimensions that can divide the full volume are preferred, but not mandatory.) +Command line options \texttt{--chunks} and \texttt{--omp} control the chunking and +parallel execution behavior respectivelly. +The help message of \texttt{sperr3d} details all the options it takes: + +\begin{minted}{bash} +$ ./bin/sperr3d -h + +Usage: ./bin/sperr3d [OPTIONS] [filename] + +Positionals: + filename TEXT:FILE A data volume to be compressed, or + a bitstream to be decompressed. + +Options: + -h,--help Print this help message and exit + +Execution settings: + -c Excludes: -d Perform a compression task. + -d Excludes: -c Perform a decompression task. + --omp UINT Number of OpenMP threads to use. Default (or 0) to use all. + +Input properties (for compression): + --ftype UINT Specify the input float type in bits. Must be 32 or 64. + --dims [UINT,UINT,UINT] Dimensions of the input volume. E.g., `--dims 128 128 128` + (The fastest-varying dimension appears first.) + +Output settings: + --bitstream TEXT Output compressed bitstream. + --decomp_f TEXT Output decompressed volume in f32 precision. + --decomp_d TEXT Output decompressed volume in f64 precision. + --decomp_lowres_f TEXT Output lower resolutions of the decompressed volume in f32 precision. + --decomp_lowres_d TEXT Output lower resolutions of the decompressed volume in f64 precision. + --print_stats Needs: -c Print statistics measuring the compression quality. + +Compression settings: + --chunks [UINT,UINT,UINT] Dimensions of the preferred chunk size. Default: 256 256 256 + (Volume dims don't need to be divisible by these chunk dims.) + --pwe FLOAT Excludes: --psnr --bpp + Maximum point-wise error (PWE) tolerance. + --psnr FLOAT Excludes: --pwe --bpp + Target PSNR to achieve. + --bpp FLOAT:FLOAT in [0 - 64] Excludes: --pwe --psnr + Target bit-per-pixel (bpp) to achieve. +\end{minted} + +Examples: +\begin{enumerate} +\item Compress a 3D volume in $384\times384\times256$ dimension, double-precision floats, + using a PWE tolerance of $10^{-9}$ and chunks of $192\times192\times256$: \\ + \texttt{./bin/sperr3d -c --omp 4 --ftype 64 --dims 384 384 256 --chunks 192 192 256 + \textbackslash \\ + --pwe 1e-9 --bitstream ./out.stream ./in.f64} +\item Perform the compression task described above, plus also write out the + compress-decompressed volume, and finally print statistics + measuring the compression quality: \\ + \texttt{./bin/sperr3d -c --omp 4 --ftype 64 --dims 384 384 256 --chunks 192 192 256 + \textbackslash \\ + --pwe 1e-9 --decomp\_d ./out.decomp --print\_stats --bitstream ./out.stream ./in.f64} +\item Decompress from a SPERR bitstream, and write out the volume in native + and coarsened resolutions: \\ + \texttt{./bin/sperr3d -d --decomp\_d ./out.decomp --decomp\_lowres\_d ./out.lowres + ./sperr.stream} + In this example, the output file \texttt{out.decomp} will be of the native resolution, + and five other files (\texttt{out.lowres.192x192x128, out.lowres.96x96x64, etc.}) + will also be produced with their filenames indicating the coarsened resolution. +\end{enumerate} + +\callout{To support multi-resolution decoding in 3D cases, the individual chunks +(\texttt{--chunks}) need to 1) approximate a cube, so that there are the same number +of wavelet transforms performed on each dimension, and 2) divide the full volume +in each dimension.} + +\subsubsection{\texttt{sperr3d\_trunc} and Flexible-Rate Decoding} +Compressed SPERR bitstreams support \textit{flexible-rate} decoding, meaning that a +sub-bitstream of it from the beginning can still be used to reconstruct the data field. +Equally important, the reconstruction will have the best possible quality (in terms of +average error) under the the size constraint of the sub-bitstream, though lower quality +than using the full bitstream. +Given a compressed bitstream, a sub-bitstream can be obtained by a simple truncation, +for example, a truncation that keeps the first 10\% of the full bitstream. +%On Unix systems, utility tool \texttt{head} can easily perform this task. + +The chunking scheme used in 3D compression (see Section~\ref{sec:sperr3d}) +brings some complication, +because the bitstream representing each chunk needs to be truncated \textit{individually}. +\texttt{sperr3d\_trunc} is thus introduced to properly truncate a multi-chunked SPERR bitstream. +Specifically, it 1) locates the bitstream representing each chunk, +2) truncates the bitstream, and 3) stitches all truncated bitstreams together so +\texttt{sperr3d} can properly decompress it. + +The help message of \texttt{sperr3d\_trunc} details its options: + +\callout{SPERR bitstreams without using multi-chunks (i.e., \texttt{--dims} equals +\texttt{--chunks} in 3D, and all 2D cases) can safely be truncated by any method. +For example, the following command truncates a compressed bitstream \texttt{field.stream} +to keep its first 5kB as \texttt{field2.stream}, which is also recognized by SPERR:\\ +\texttt{head -c 5000 density.stream > density2.stream} +} + +\begin{minted}{bash} +$ ./bin/sperr3d_trunc -h + +Usage: ./bin/sperr3d_trunc [OPTIONS] [filename] + +Positionals: + filename TEXT:FILE The original SPERR3D bitstream to be truncated. + +Options: + -h,--help Print this help message and exit + +Truncation settings: + --pct UINT REQUIRED Percentage (1--100) of the original bitstream to truncate. + --omp UINT Number of OpenMP threads to use. Default (or 0) to use all. + +Output settings: + -o TEXT Write out the truncated bitstream. + +Input settings: + --orig32 TEXT Original raw data in 32-bit precision to calculate compression + quality using the truncated bitstream. + --orig64 TEXT Original raw data in 64-bit precision to calculate compression + quality using the truncated bitstream. +\end{minted} + +Examples: +\begin{enumerate} +\item Produce a truncated version of a bitstream using 10\% of the original length: \\ + \texttt{./bin/sperr3d\_trunc --pct 10 -o ./stream.10 ./bitstream} +\item Perform the task above, plus evaluate compression quality using the truncated + bitstream:\\ + \texttt{./bin/sperr3d\_trunc --pct 10 -o ./stream.10 --orig64 ./data.f64 ./bitstream} +\end{enumerate} + +\subsection{C++ Interface} +\label{sec:cpp} +\subsubsection{2D Compression and Decompression} +C++ class \texttt{sperr::SPECK2D\_FLT} is responsible for 2D compression and decompression. +The sample code walks through necessary steps to perform a compression and decompression +task, and a more concrete example can be found +\href{https://github.com/NCAR/SPERR/blob/main/utilities/sperr2d.cpp}{here}. + +\begin{minted}{cpp} +// +// Example of using a sperr::SPECK2D_FLT() to compress a 2D slice. +// This is a 6-step process. +// +#include "SPECK2D_FLT.h" + +// Step 1: create an encoder: +auto encoder = sperr::SPECK2D_FLT(); + +// Step 2: specify the 2D slice dimension (the third dimension is left with 1): +encoder.set_dims({128, 128, 1}); + +// Step 3: copy over the input data from a raw pointer (float* or double*): +encoder.copy_data(ptr, 16'384); // 16,384 is the number of values. +// Step 3 alternative: one can hand a memory buffer to the encoder to avoid a memory copy; +// use either version is cool. +encoder.take_data(std::move(input)); // input is of type std::vector. + +// Step 4: specify the compression quality measured in one of three metrics; +// only the last invoked quality metric is honored. +encoder.set_tolerance(1e-9); // PWE tolerance = 1e-9 +encoder.set_bitrate(2.2); // Target bitrate = 2.2 +encoder.set_psnr(102.2); // Target PSNR = 102.2 + +// Step 5: perform the compression task: +encoder.compress(); + +// Step 6: retrieve the compressed bitstream: +auto bitstream = std::vector(); +encoder.append_encoded_bitstream(bitstream); +\end{minted} + +\begin{minted}{cpp} +// +// Example of using a sperr::SPECK2D_FLT() to decompress a bitstream. +// This is a 5-step process. +// +#include "SPECK2D_FLT.h" + +// Step 1: create a decoder: +auto decoder = sperr::SPECK2D_FLT(); + +// Step 2: specify the 2D slice dimension (the third dimension is left with 1): +// This information is often saved once somewhere for many same-sized slices. +decoder.set_dims({128, 128, 1}); + +// Step 3: pass in the compressed bitstream as a raw pointer (uint8_t*): +decoder.use_bitstream(ptr, 16'384); // 16,384 is the length of the bitstream. + +// Step 4: perform the decompression task: +decoder.decompress(multi_res); // a boolean, if to enable multi-resolution decoding + +// Step 5: retrieve the decompressed volume: +std::vector vol = decoder.view_decoded_data(); +auto hierarchy = decoder.view_hierarchy(); // if multi-resolution was enabled +// Step 5 alternative: one can take ownership of the data buffer to avoid a memory copy. +std::vector vol = decoder.release_decoded_data(); +auto hierarchy = decoder.release_hierarchy(); // if multi-resolution was enabled +\end{minted} + +\subsubsection{3D Compression and Decompression} +C++ class \texttt{sperr::SPERR3D\_OMP\_C} is responsible for 3D compression, and +\texttt{sperr::SPERR3D\_OMP\_D} is responsible for 3D decompression. +The sample code walks through necessary steps to perform a compression and decompression +task, and a more concrete example can be found +\href{https://github.com/NCAR/SPERR/blob/main/utilities/sperr3d.cpp}{here}. + +\begin{minted}{cpp} +// +// Example of using a sperr::SPERR3D_OMP_C() to compress a 3D volume. +// This is a 6-step process. +// +#include "SPERR3D_OMP_C.h" + +// Step 1: create an encoder: +auto encoder = sperr::SPERR3D_OMP_C(); + +// Step 2: specify the volume and chunk dimensions, respectively: +encoder.set_dims_and chunks({384, 384, 256}, {192, 192, 128}); + +// Step 3: specify the number of OpenMP threads to use: +encoder.set_num_threads(4); + +// Step 4: specify the compression quality measured in one of three metrics; +// only the last invoked quality metric is honored. +encoder.set_tolerance(1e-9); // PWE tolerance = 1e-9 +encoder.set_bitrate(2.2); // Target bitrate = 2.2 +encoder.set_psnr(102.2); // Target PSNR = 102.2 + +// Step 5: perform the compression task: +// The input data is passed in in the form of a raw pointer (float* or double*), +// and the total number of values will be passed in here too. +encoder.compress(ptr, 384 * 384 * 256); + +// Step 6: retrieve the compressed bitstream: +std::vector stream = encoder.get_encoded_bitstream(); +\end{minted} + +\begin{minted}{cpp} +// +// Example of using a sperr::SPERR3D_OMP_D() to decompress a bitstream. +// This is a 5-step process. +// +#include "SPERR3D_OMP_D.h" + +// Step 1: create a decoder: +auto decoder = sperr::SPERR3D_OMP_D(); + +// Step 2: specify the number of OpenMP threads to use: +decoder.set_num_threads(4); + +// Step 3: pass in the compressed bitstream as a raw pointer (uint8_t*): +decoder.use_bitstream(ptr, 16'384); // 16,384 is the length of the bitstream. + +// Step 4: perform the decompression task: +// Note that the pointer to the bitstream is passed in again! +decoder.decompress(ptr, multi_res); // a boolean, if to enable multi-resolution decoding + +// Step 5: retrieve the decompressed volume: +auto [dimx, dimy, dimz] = decoder.get_dims(); // dimension of the volume +std::vector vol = decoder.view_decoded_data(); +auto hierarchy = decoder.view_hierarchy(); // if multi-resolution was enabled +// Step 5 alternative: one can take ownership of the data buffer to avoid memory copies. +std::vector vol = decoder.release_decoded_data(); +auto hierarchy = decoder.release_hierarchy(); // if multi-resolution was enabled +\end{minted} + +\callout{To achieve higher performance with repeated compression and + decompression tasks, the encoder and decoder objects are better to be + re-used rather than repeatedly destroyed and created.} + +\subsection{C Interface} +\label{sec:c} +SPERR provides a C wrapper with a set of C functions. +All of the C interface is in the header file +\href{https://github.com/NCAR/SPERR/blob/main/include/SPERR_C_API.h}{\texttt{SPERR\_C\_API.h}}, +which itself documents the C functions and parameters, etc. +The following example code walks through key steps to use the C API to perform +compression and decompression, while more concrete examples are available +for \href{https://github.com/NCAR/SPERR/blob/main/examples/C_API/2d.c}{2D} +and \href{https://github.com/NCAR/SPERR/blob/main/examples/C_API/3d.c}{3D} cases. + +\subsubsection{Example: 2D} + +\begin{minted}{C} +/* + * Example of using the SPERR C API to perform 2D compression and decompression tasks. + */ +#include "SPERR_C_API.h" + +/* Step 1: create variables to keep the output: */ +void* stream = NULL; /* caller is responsible for free'ing it after use. */ +size_t stream_len = 0; + +/* Step 2: call the 2D compression function: + * Assume that we have a buffer of 128 * 128 floats (in float* type) to be compressed, + * using PWE tolerance = 1e-3. + */ +int ret = sperr_comp_2d(ptr, /* memory buffer containing the input */ + 1, /* the input is of type float; 0 means double. */ + 128, /* dimx */ + 128, /* dimy */ + 3, /* compression mode; 3 means fixed PWE */ + 1e-3, /* actual PWE tolerance */ + 0, /* not using a header for the output bitstream */ + &stream, /* will hold the compressed bitstream */ + &stream_len); /* length of the compressed bitstream */ +assert(ret == 0); + +/* + * Now that the 2D compression is completed, one can decompress the bitstream to + * retrieve the raw values, as the rest of this example shows. + */ + +/* Step 3: create a pointer to hold the decompressed values: */ +void* output = NULL; /* caller is responsible for free'ing it after use. */ + +/* Step 4: call the 2D decompression function: */ +int ret2 = sperr_decomp_2d(stream, /* compressed bitstream */ + stream_len, /* compressed bitstream length */ + 1, /* decompress to floats. 0 means to doubles. */ + 128, /* dimx */ + 128, /* dimy */ + &output); /* decompressed data is stored here */ +assert(ret2 == 0); +free(output); /* cleanup */ +free(stream); /* cleanup */ +\end{minted} + +\subsubsection{Example: 3D} + +\begin{minted}{C} +/* + * Example of using the SPERR C API to perform 3D compression and decompression tasks. + */ +#include "SPERR_C_API.h" + +/* Step 1: create variables to keep the output: */ +void* stream = NULL; /* caller is responsible for free'ing it after use. */ +size_t stream_len = 0; + +/* Step 2: call the 3D compression function: + * Assume that we have a buffer of 256^3 floats (in float* type) to be compressed, + * using PWE tolerance = 1e-3 and chunk dimension of 128^3. + */ +int ret = sperr_comp_3d(ptr, /* memory buffer containing the input */ + 1, /* the input is of type float; 0 means double. */ + 256, /* dimx */ + 256, /* dimy */ + 256, /* dimz */ + 128, /* chunk_x */ + 128, /* chunk_y */ + 128, /* chunk_z */ + 3, /* compression mode; 3 means fixed PWE */ + 1e-3, /* actual PWE tolerance */ + 4, /* use 4 OpenMP threads */ + &stream, /* will hold the compressed bitstream */ + &stream_len); /* length of the compressed bitstream */ +assert(ret == 0); + +/* + * Now that the 3D compression is completed, one can decompress the bitstream to + * retrieve the raw values, as the rest of this example shows. + */ + +/* Step 3: create a pointer to hold the decompressed values, + * and also variables to hold the volume dimensions. + */ +void* output = NULL; /* caller is responsible for free'ing it after use. */ +size_t dimx = 0, dimy = 0, dimz = 0; + +/* Step 4: call the 3D decompression function: */ +int ret2 = sperr_decomp_3d(stream, /* compressed bitstream */ + stream_len, /* compressed bitstream length */ + 1, /* decompress to floats. 0 means to doubles. */ + 4, /* use 4 OpenMP threads */ + &dimx, /* dimx of the decompressed volume */ + &dimy, /* dimy of the decompressed volume */ + &dimz, /* dimz of the decompressed volume */ + &output); /* decompressed data is stored here */ +assert(ret2 == 0); +free(output); /* cleanup */ +free(stream); /* cleanup */ +\end{minted} + +\end{document} diff --git a/src/SPERR/include/Bitmask.h b/src/SPERR/include/Bitmask.h index 3805d083..052f8a67 100644 --- a/src/SPERR/include/Bitmask.h +++ b/src/SPERR/include/Bitmask.h @@ -37,16 +37,24 @@ class Bitmask { // auto rlong(size_t idx) const -> uint64_t; // `idx` of the bit, not the long. auto rbit(size_t idx) const -> bool; + + // Functions to perform bulk tests. + // + // Two versions of the `has_true()` function. Both versions return -1 in case of no true found. + // 1) Position == false: it returns 1 indicating finding a true. + // 2) Position == true: it returns the offset relative to `start` of the first true. + template + auto has_true(size_t start, size_t len) const -> int64_t; auto count_true() const -> size_t; // How many 1's in this mask? // Functions for write // - void wlong(size_t idx, uint64_t value); // `idx` of the bit, not the long. void wbit(size_t idx, bool bit); - void wtrue(size_t idx); // This is faster than `wbit(idx, true)`. - void wfalse(size_t idx); // This is faster than `wbit(idx, false)`. - void reset(); // Set the current bitmask to be all 0's. - void reset_true(); // Set the current bitmask to be all 1's. + void wlong(size_t idx, uint64_t value); // `idx` of the bit, not the long. + void wtrue(size_t idx); // This is faster than `wbit(idx, true)`. + void wfalse(size_t idx); // This is faster than `wbit(idx, false)`. + void reset(); // Set the current bitmask to be all 0's. + void reset_true(); // Set the current bitmask to be all 1's. // Functions for direct access of the underlying data buffer // Note: `use_bitstream()` reads the number of values (uint64_t type) that provide @@ -55,7 +63,7 @@ class Bitmask { auto view_buffer() const -> const std::vector&; void use_bitstream(const void* p); -#if defined __cpp_lib_three_way_comparison && defined __cpp_impl_three_way_comparison +#if __cplusplus >= 202002L && defined __cpp_lib_three_way_comparison auto operator<=>(const Bitmask& rhs) const noexcept; auto operator==(const Bitmask& rhs) const noexcept -> bool; #endif diff --git a/src/SPERR/include/SPECK3D_INT.h b/src/SPERR/include/SPECK3D_INT.h index 3859d960..881f98d5 100644 --- a/src/SPERR/include/SPECK3D_INT.h +++ b/src/SPERR/include/SPECK3D_INT.h @@ -62,7 +62,7 @@ class SPECK3D_INT : public SPECK_INT { virtual void m_process_S(size_t idx1, size_t idx2, size_t& counter, bool) = 0; virtual void m_process_P(size_t i, size_t m, size_t& c, bool) = 0; // Called by `m_code_S()`. virtual void m_process_P_lite(size_t idx) = 0; // Called by `m_sorting_pass()` directly. - virtual void m_additional_initialization() = 0; + virtual void m_additional_initialization() {}; // empty by default void m_code_S(size_t idx1, size_t idx2); auto m_partition_S_XYZ(Set3D, uint16_t) const -> std::tuple, uint16_t>; diff --git a/src/SPERR/include/SPECK3D_INT_DEC.h b/src/SPERR/include/SPECK3D_INT_DEC.h index 6c7f1973..3ccb6799 100644 --- a/src/SPERR/include/SPECK3D_INT_DEC.h +++ b/src/SPERR/include/SPECK3D_INT_DEC.h @@ -24,7 +24,6 @@ class SPECK3D_INT_DEC final : public SPECK3D_INT { void m_process_S(size_t idx1, size_t idx2, size_t& counter, bool read) final; void m_process_P(size_t idx, size_t no_use, size_t& counter, bool read) final; void m_process_P_lite(size_t idx) final; - void m_additional_initialization() final{}; // empty function }; }; // namespace sperr diff --git a/src/SPERR/include/SPECK3D_INT_ENC.h b/src/SPERR/include/SPECK3D_INT_ENC.h index 2b73fa67..2ccb25e2 100644 --- a/src/SPERR/include/SPECK3D_INT_ENC.h +++ b/src/SPERR/include/SPECK3D_INT_ENC.h @@ -3,8 +3,6 @@ #include "SPECK3D_INT.h" -#include - namespace sperr { // diff --git a/src/SPERR/include/SPECK_INT.h b/src/SPERR/include/SPECK_INT.h index 412d8e6c..1abd2a8a 100644 --- a/src/SPERR/include/SPECK_INT.h +++ b/src/SPERR/include/SPECK_INT.h @@ -44,7 +44,7 @@ class SPECK_INT { // Note: `speck_int_get_num_bitplanes()` is provided as a free-standing helper function (above). // // Retrieve the number of useful bits of a SPECK bitstream from its header. - auto get_speck_bits(const void*) const -> uint64_t; + auto get_speck_num_bits(const void*) const -> uint64_t; // Retrieve the number of bytes of a SPECK bitstream (including header) from its header. auto get_stream_full_len(const void*) const -> uint64_t; diff --git a/src/SPERR/include/SPERR3D_OMP_D.h b/src/SPERR/include/SPERR3D_OMP_D.h index 81117676..6b7d56b1 100644 --- a/src/SPERR/include/SPERR3D_OMP_D.h +++ b/src/SPERR/include/SPERR3D_OMP_D.h @@ -24,6 +24,7 @@ class SPERR3D_OMP_D { auto decompress(const void* bitstream, bool multi_res = false) -> RTNType; auto view_decoded_data() const -> const sperr::vecd_type&; + auto view_hierarchy() const -> const std::vector&; auto release_decoded_data() -> sperr::vecd_type&&; auto release_hierarchy() -> std::vector&&; diff --git a/src/SPERR/include/SPERR3D_Stream_Tools.h b/src/SPERR/include/SPERR3D_Stream_Tools.h index 0fb03779..11cb65ae 100644 --- a/src/SPERR/include/SPERR3D_Stream_Tools.h +++ b/src/SPERR/include/SPERR3D_Stream_Tools.h @@ -36,7 +36,7 @@ class SPERR3D_Stream_Tools { // Function that reads in portions of a file only to facilitate progressive access. // (This function does not read the whole file.) - auto progressive_read(std::string filename, unsigned pct) const -> vec8_type; + auto progressive_read(const std::string& filename, unsigned pct) const -> vec8_type; // Function that truncates a bitstream in the memory to facilitate progressive access. // Note on `stream_len`: it does not need to be the full length of the original bitstream, @@ -56,8 +56,9 @@ class SPERR3D_Stream_Tools { // Given the header of a bitstream and a desired percentage to truncate, return an // updated header and a list of {offset, len} to access. // Note: this function assumes that the header is complete. - auto m_progressive_helper(const void* header_buf, size_t buf_len, unsigned pct) const - -> std::tuple>; + auto m_progressive_helper(const void* header_buf, + size_t buf_len, + unsigned pct) const -> std::tuple>; }; } // End of namespace sperr diff --git a/src/SPERR/include/sperr_helper.h b/src/SPERR/include/sperr_helper.h index b15f5734..befca0b0 100644 --- a/src/SPERR/include/sperr_helper.h +++ b/src/SPERR/include/sperr_helper.h @@ -37,7 +37,7 @@ using dims_type = std::array; // // Helper classes // -enum class SigType : unsigned char { Insig, Sig, NewlySig, Dunno, Garbage }; +enum class SigType : unsigned char { Insig, Sig, Dunno, Garbage }; enum class UINTType : unsigned char { UINT8, UINT16, UINT32, UINT64 }; @@ -136,8 +136,9 @@ auto read_whole_file(std::string filename) -> vec_type; // Read sections of a file (extract sections from a memory buffer), and append those sections // to the end of `dst`. The read from file version avoids reading not-requested sections. // The sections are defined by pairs of offsets and lengths, both in number of bytes. -auto read_sections(std::string filename, const std::vector& sections, vec8_type& dst) - -> RTNType; +auto read_sections(std::string filename, + const std::vector& sections, + vec8_type& dst) -> RTNType; auto extract_sections(const void* buf, size_t buf_len, const std::vector& sections, diff --git a/src/SPERR/src/Bitmask.cpp b/src/SPERR/src/Bitmask.cpp index 036274e3..cebf6cd9 100644 --- a/src/SPERR/src/Bitmask.cpp +++ b/src/SPERR/src/Bitmask.cpp @@ -1,13 +1,18 @@ #include "Bitmask.h" #include +#include #include +#if __cplusplus >= 202002L +#include +#endif + sperr::Bitmask::Bitmask(size_t nbits) { if (nbits > 0) { auto num_longs = nbits / 64; - if (nbits % 64 != 0) + if (nbits - num_longs * 64 != 0) num_longs++; m_buf.assign(num_longs, 0); m_num_bits = nbits; @@ -22,7 +27,7 @@ auto sperr::Bitmask::size() const -> size_t void sperr::Bitmask::resize(size_t nbits) { auto num_longs = nbits / 64; - if (nbits % 64 != 0) + if (nbits - num_longs * 64 != 0) num_longs++; m_buf.resize(num_longs, 0); m_num_bits = nbits; @@ -35,11 +40,81 @@ auto sperr::Bitmask::rlong(size_t idx) const -> uint64_t auto sperr::Bitmask::rbit(size_t idx) const -> bool { - auto word = m_buf[idx / 64]; - word &= uint64_t{1} << (idx % 64); + auto div = idx / 64; + auto rem = idx - div * 64; + auto word = m_buf[div]; + word &= uint64_t{1} << rem; return (word != 0); } +template +auto sperr::Bitmask::has_true(size_t start, size_t len) const -> int64_t +{ + auto long_idx = start / 64; + auto processed_bits = int64_t{0}; + auto word = m_buf[long_idx]; + auto answer = uint64_t{0}; + + // Collect the remaining bits from the start long. + auto begin_idx = start - long_idx * 64; + auto nbits = std::min(size_t{64}, begin_idx + len); + for (auto i = begin_idx; i < nbits; i++) { + answer |= word & (uint64_t{1} << i); + if constexpr (Position) { + if (answer != 0) + return processed_bits; + } + processed_bits++; + } + if constexpr (!Position) { + if (answer != 0) + return 1; + } + + // Examine the subsequent full longs. + while (processed_bits + 64 <= len) { + word = m_buf[++long_idx]; + if (word) { + if constexpr (Position) { +#if __cplusplus >= 202002L + int64_t i = std::countr_zero(word); + return processed_bits + i; +#else + for (int64_t i = 0; i < 64; i++) + if (word & (uint64_t{1} << i)) + return processed_bits + i; +#endif + } + else + return 1; + } + processed_bits += 64; + } + + // Examine the remaining bits + if (processed_bits < len) { + nbits = len - processed_bits; + assert(nbits < 64); + word = m_buf[++long_idx]; + answer = 0; + for (int64_t i = 0; i < nbits; i++) { + answer |= word & (uint64_t{1} << i); + if constexpr (Position) { + if (answer != 0) + return processed_bits + i; + } + } + if constexpr (!Position) { + if (answer != 0) + return 1; + } + } + + return -1; +} +template auto sperr::Bitmask::has_true(size_t, size_t) const -> int64_t; +template auto sperr::Bitmask::has_true(size_t, size_t) const -> int64_t; + auto sperr::Bitmask::count_true() const -> size_t { size_t counter = 0; @@ -48,11 +123,15 @@ auto sperr::Bitmask::count_true() const -> size_t // Note that unused bits in the last long are not guaranteed to be all 0's. for (size_t i = 0; i < m_buf.size() - 1; i++) { - const auto val = m_buf[i]; + auto val = m_buf[i]; +#if __cplusplus >= 202002L + counter += std::popcount(val); +#else if (val != 0) { for (size_t j = 0; j < 64; j++) counter += ((val >> j) & uint64_t{1}); } +#endif } const auto val = m_buf.back(); if (val != 0) { @@ -71,7 +150,7 @@ void sperr::Bitmask::wlong(size_t idx, uint64_t value) void sperr::Bitmask::wbit(size_t idx, bool bit) { const auto wstart = idx / 64; - const auto mask = uint64_t{1} << (idx % 64); + const auto mask = uint64_t{1} << (idx - wstart * 64); auto word = m_buf[wstart]; if (bit) @@ -84,7 +163,7 @@ void sperr::Bitmask::wbit(size_t idx, bool bit) void sperr::Bitmask::wtrue(size_t idx) { const auto wstart = idx / 64; - const auto mask = uint64_t{1} << (idx % 64); + const auto mask = uint64_t{1} << (idx - wstart * 64); auto word = m_buf[wstart]; word |= mask; @@ -94,7 +173,7 @@ void sperr::Bitmask::wtrue(size_t idx) void sperr::Bitmask::wfalse(size_t idx) { const auto wstart = idx / 64; - const auto mask = uint64_t{1} << (idx % 64); + const auto mask = uint64_t{1} << (idx - wstart * 64); auto word = m_buf[wstart]; word &= ~mask; @@ -122,7 +201,7 @@ void sperr::Bitmask::use_bitstream(const void* p) std::copy(pu64, pu64 + m_buf.size(), m_buf.begin()); } -#if defined __cpp_lib_three_way_comparison && defined __cpp_impl_three_way_comparison +#if __cplusplus >= 202002L && defined __cpp_lib_three_way_comparison auto sperr::Bitmask::operator<=>(const Bitmask& rhs) const noexcept { auto cmp = m_num_bits <=> rhs.m_num_bits; diff --git a/src/SPERR/src/Bitstream.cpp b/src/SPERR/src/Bitstream.cpp index 909985f9..cabff558 100644 --- a/src/SPERR/src/Bitstream.cpp +++ b/src/SPERR/src/Bitstream.cpp @@ -29,7 +29,7 @@ void sperr::Bitstream::reserve(size_t nbits) if (nbits > m_buf.size() * 64) { // Number of longs that's absolutely needed. auto num_longs = nbits / 64; - if (nbits % 64 != 0) + if (num_longs * 64 < nbits) num_longs++; const auto dist = std::distance(m_buf.begin(), m_itr); @@ -54,8 +54,9 @@ auto sperr::Bitstream::rtell() const -> size_t void sperr::Bitstream::rseek(size_t offset) { - m_itr = m_buf.begin() + offset / 64; - const auto rem = offset % 64; + size_t div = offset / 64; + size_t rem = offset - div * 64; + m_itr = m_buf.begin() + div; if (rem) { m_buffer = *m_itr >> rem; ++m_itr; @@ -90,8 +91,9 @@ auto sperr::Bitstream::wtell() const -> size_t void sperr::Bitstream::wseek(size_t offset) { - m_itr = m_buf.begin() + offset / 64; - const auto rem = offset % 64; + size_t div = offset / 64; + size_t rem = offset - div * 64; + m_itr = m_buf.begin() + div; if (rem) { m_buffer = *m_itr; m_buffer &= (uint64_t{1} << rem) - 1; @@ -114,7 +116,7 @@ void sperr::Bitstream::wbit(bool bit) #endif { if (m_itr == m_buf.end()) { // allocate memory if necessary. - const auto dist = m_buf.size(); + auto dist = m_buf.size(); m_buf.resize(std::max(size_t{1}, dist) * 2 - dist / 2); // use a growth factor of 1.5 m_itr = m_buf.begin() + dist; } @@ -129,7 +131,7 @@ void sperr::Bitstream::flush() { if (m_bits) { // only really flush when there are remaining bits. if (m_itr == m_buf.end()) { - const auto dist = m_buf.size(); + auto dist = m_buf.size(); m_buf.resize(std::max(size_t{1}, dist) * 2 - dist / 2); // use a growth factor of 1.5 m_itr = m_buf.begin() + dist; } @@ -144,6 +146,7 @@ void sperr::Bitstream::flush() void sperr::Bitstream::write_bitstream(void* p, size_t num_bits) const { assert(num_bits <= m_buf.size() * 64); + const auto num_longs = num_bits / 64; auto rem_bytes = num_bits / 8 - num_longs * sizeof(uint64_t); if (num_bits % 8 != 0) @@ -162,8 +165,9 @@ void sperr::Bitstream::write_bitstream(void* p, size_t num_bits) const auto sperr::Bitstream::get_bitstream(size_t num_bits) const -> std::vector { assert(num_bits <= m_buf.size() * 64); + auto num_bytes = num_bits / 8; - if (num_bits % 8 != 0) + if (num_bits - num_bytes * 8 != 0) num_bytes++; auto tmp = std::vector(num_bytes); diff --git a/src/SPERR/src/Conditioner.cpp b/src/SPERR/src/Conditioner.cpp index 1031f4d7..8d637554 100644 --- a/src/SPERR/src/Conditioner.cpp +++ b/src/SPERR/src/Conditioner.cpp @@ -63,8 +63,9 @@ auto sperr::Conditioner::condition(vecd_type& buf, dims_type dims) -> condi_type return header; } -auto sperr::Conditioner::inverse_condition(vecd_type& buf, dims_type dims, condi_type header) - -> RTNType +auto sperr::Conditioner::inverse_condition(vecd_type& buf, + dims_type dims, + condi_type header) -> RTNType { // unpack meta bit fields auto meta = sperr::unpack_8_booleans(header[0]); diff --git a/src/SPERR/src/SPECK1D_INT_DEC.cpp b/src/SPERR/src/SPECK1D_INT_DEC.cpp index 8fefd5be..8d73e0e1 100644 --- a/src/SPERR/src/SPECK1D_INT_DEC.cpp +++ b/src/SPERR/src/SPECK1D_INT_DEC.cpp @@ -5,6 +5,10 @@ #include // std::memcpy() #include +#if __cplusplus >= 202002L +#include +#endif + template void sperr::SPECK1D_INT_DEC::m_sorting_pass() { @@ -13,7 +17,15 @@ void sperr::SPECK1D_INT_DEC::m_sorting_pass() const auto bits_x64 = m_LIP_mask.size() - m_LIP_mask.size() % 64; for (size_t i = 0; i < bits_x64; i += 64) { - const auto value = m_LIP_mask.rlong(i); + auto value = m_LIP_mask.rlong(i); + +#if __cplusplus >= 202002L + while (value) { + size_t j = std::countr_zero(value); + m_process_P(i + j, j, true); + value &= value - 1; + } +#else if (value != 0) { for (size_t j = 0; j < 64; j++) { if ((value >> j) & uint64_t{1}) { @@ -22,6 +34,7 @@ void sperr::SPECK1D_INT_DEC::m_sorting_pass() } } } +#endif } for (auto i = bits_x64; i < m_LIP_mask.size(); i++) { if (m_LIP_mask.rbit(i)) { diff --git a/src/SPERR/src/SPECK1D_INT_ENC.cpp b/src/SPERR/src/SPECK1D_INT_ENC.cpp index a8c36dc8..45373b04 100644 --- a/src/SPERR/src/SPECK1D_INT_ENC.cpp +++ b/src/SPERR/src/SPECK1D_INT_ENC.cpp @@ -5,6 +5,10 @@ #include // std::memcpy() #include +#if __cplusplus >= 202002L +#include +#endif + template void sperr::SPECK1D_INT_ENC::m_sorting_pass() { @@ -13,7 +17,15 @@ void sperr::SPECK1D_INT_ENC::m_sorting_pass() const auto bits_x64 = m_LIP_mask.size() - m_LIP_mask.size() % 64; for (size_t i = 0; i < bits_x64; i += 64) { - const auto value = m_LIP_mask.rlong(i); + auto value = m_LIP_mask.rlong(i); + +#if __cplusplus >= 202002L + while (value) { + size_t j = std::countr_zero(value); + m_process_P(i + j, SigType::Dunno, j, true); + value &= value - 1; + } +#else if (value != 0) { for (size_t j = 0; j < 64; j++) { if ((value >> j) & uint64_t{1}) { @@ -22,6 +34,7 @@ void sperr::SPECK1D_INT_ENC::m_sorting_pass() } } } +#endif } for (auto i = bits_x64; i < m_LIP_mask.size(); i++) { if (m_LIP_mask.rbit(i)) { @@ -49,9 +62,6 @@ void sperr::SPECK1D_INT_ENC::m_process_S(size_t idx1, size_t& counter, bool output) { - // Significance type cannot be NewlySig! - assert(sig != SigType::NewlySig); - auto& set = m_LIS[idx1][idx2]; // Strategy to decide the significance of this set; @@ -88,7 +98,6 @@ template void sperr::SPECK1D_INT_ENC::m_process_P(size_t idx, SigType sig, size_t& counter, bool output) { // Decide the significance of this pixel - assert(sig != SigType::NewlySig); bool is_sig = false; if (sig == SigType::Dunno) is_sig = (m_coeff_buf[idx] >= m_threshold); diff --git a/src/SPERR/src/SPECK2D_INT.cpp b/src/SPERR/src/SPECK2D_INT.cpp index 077f3bdf..fdb69e3e 100644 --- a/src/SPERR/src/SPECK2D_INT.cpp +++ b/src/SPERR/src/SPECK2D_INT.cpp @@ -3,6 +3,10 @@ #include #include +#if __cplusplus >= 202002L +#include +#endif + template void sperr::SPECK2D_INT::m_sorting_pass() { @@ -11,7 +15,15 @@ void sperr::SPECK2D_INT::m_sorting_pass() const auto bits_x64 = m_LIP_mask.size() - m_LIP_mask.size() % 64; for (size_t i = 0; i < bits_x64; i += 64) { - const auto value = m_LIP_mask.rlong(i); + auto value = m_LIP_mask.rlong(i); + +#if __cplusplus >= 202002L + while (value) { + size_t j = std::countr_zero(value); + m_process_P(i + j, j, true); + value &= value - 1; + } +#else if (value != 0) { for (size_t j = 0; j < 64; j++) { if ((value >> j) & uint64_t{1}) { @@ -20,6 +32,7 @@ void sperr::SPECK2D_INT::m_sorting_pass() } } } +#endif } for (auto i = bits_x64; i < m_LIP_mask.size(); i++) { if (m_LIP_mask.rbit(i)) { diff --git a/src/SPERR/src/SPECK3D_INT.cpp b/src/SPERR/src/SPECK3D_INT.cpp index 694c4c13..763db790 100644 --- a/src/SPERR/src/SPECK3D_INT.cpp +++ b/src/SPERR/src/SPECK3D_INT.cpp @@ -5,6 +5,10 @@ #include #include +#if __cplusplus >= 202002L +#include +#endif + template void sperr::SPECK3D_INT::m_clean_LIS() { @@ -100,13 +104,22 @@ void sperr::SPECK3D_INT::m_sorting_pass() const auto bits_x64 = m_LIP_mask.size() - m_LIP_mask.size() % 64; for (size_t i = 0; i < bits_x64; i += 64) { - const auto value = m_LIP_mask.rlong(i); + auto value = m_LIP_mask.rlong(i); + +#if __cplusplus >= 202002L + while (value) { + auto j = std::countr_zero(value); + m_process_P_lite(i + j); + value &= value - 1; + } +#else if (value != 0) { for (size_t j = 0; j < 64; j++) { if ((value >> j) & uint64_t{1}) m_process_P_lite(i + j); } } +#endif } for (auto i = bits_x64; i < m_LIP_mask.size(); i++) { if (m_LIP_mask.rbit(i)) @@ -127,27 +140,73 @@ void sperr::SPECK3D_INT::m_sorting_pass() template void sperr::SPECK3D_INT::m_code_S(size_t idx1, size_t idx2) { - auto [subsets, next_lev] = m_partition_S_XYZ(m_LIS[idx1][idx2], uint16_t(idx1)); - - // Since some subsets could be empty, let's put empty sets at the end. - const auto set_end = - std::remove_if(subsets.begin(), subsets.end(), [](auto& s) { return s.num_elem() == 0; }); - - // Counter for the number of discovered significant sets. - // If no significant subset is found yet, and we're already looking at the last subset, - // then we know that this last subset IS significant. - size_t sig_counter = 0; - for (auto it = subsets.begin(); it != set_end; ++it) { - bool need_decide = (sig_counter != 0 || it + 1 != set_end); - if (it->num_elem() == 1) { - auto idx = it->start_z * m_dims[0] * m_dims[1] + it->start_y * m_dims[0] + it->start_x; - m_LIP_mask.wtrue(idx); - m_process_P(idx, it->get_morton(), sig_counter, need_decide); - } - else { - m_LIS[next_lev].emplace_back(*it); - const auto newidx2 = m_LIS[next_lev].size() - 1; - m_process_S(next_lev, newidx2, sig_counter, need_decide); + auto set = m_LIS[idx1][idx2]; + + if (set.length_x == 2 && set.length_y == 2 && set.length_z == 2) { // tail ellison case + size_t sig_counter = 0; + bool need_decide = true; + + // Element (0, 0, 0) + const auto id = set.start_z * m_dims[0] * m_dims[1] + set.start_y * m_dims[0] + set.start_x; + auto mort = set.get_morton(); + m_LIP_mask.wtrue(id); + m_process_P(id, mort, sig_counter, need_decide); + + // Element (1, 0, 0) + auto id2 = id + 1; + m_LIP_mask.wtrue(id2); + m_process_P(id2, ++mort, sig_counter, need_decide); + + // Element (0, 1, 0) + id2 = id + m_dims[0]; + m_LIP_mask.wtrue(id2); + m_process_P(id2, ++mort, sig_counter, need_decide); + + // Element (1, 1, 0) + m_LIP_mask.wtrue(++id2); + m_process_P(id2, ++mort, sig_counter, need_decide); + + // Element (0, 0, 1) + id2 = id + m_dims[0] * m_dims[1]; + m_LIP_mask.wtrue(id2); + m_process_P(id2, ++mort, sig_counter, need_decide); + + // Element (1, 0, 1) + m_LIP_mask.wtrue(++id2); + m_process_P(id2, ++mort, sig_counter, need_decide); + + // Element (0, 1, 1) + id2 = id + m_dims[0] * (m_dims[1] + 1); + m_LIP_mask.wtrue(id2); + m_process_P(id2, ++mort, sig_counter, need_decide); + + // Element (1, 1, 1) + need_decide = sig_counter != 0; + m_LIP_mask.wtrue(++id2); + m_process_P(id2, ++mort, sig_counter, need_decide); + } + else { // normal recursion case + // Get its 8 subsets, and move the empty ones to the end. + auto [subsets, next_lev] = m_partition_S_XYZ(set, uint16_t(idx1)); + const auto set_end = + std::remove_if(subsets.begin(), subsets.end(), [](auto& s) { return s.num_elem() == 0; }); + + // Counter for the number of discovered significant sets. + // If no significant subset is found yet, and we're already looking at the last subset, + // then we know that this last subset IS significant. + size_t sig_counter = 0; + for (auto it = subsets.begin(); it != set_end; ++it) { + bool need_decide = (sig_counter != 0 || it + 1 != set_end); + if (it->num_elem() == 1) { + auto idx = it->start_z * m_dims[0] * m_dims[1] + it->start_y * m_dims[0] + it->start_x; + m_LIP_mask.wtrue(idx); + m_process_P(idx, it->get_morton(), sig_counter, need_decide); + } + else { + m_LIS[next_lev].emplace_back(*it); + const auto newidx2 = m_LIS[next_lev].size() - 1; + m_process_S(next_lev, newidx2, sig_counter, need_decide); + } } } } @@ -171,15 +230,13 @@ auto sperr::SPECK3D_INT::m_partition_S_XYZ(Set3D set, uint16_t lev) const auto subsets = std::tuple, uint16_t>(); std::get<1>(subsets) = lev; - constexpr auto offsets = std::array{1, 2, 4}; auto morton_offset = set.get_morton(); // // The actual figuring out where it starts/ends part... // // subset (0, 0, 0) - constexpr auto idx0 = 0 * offsets[0] + 0 * offsets[1] + 0 * offsets[2]; - auto& sub0 = std::get<0>(subsets)[idx0]; + auto& sub0 = std::get<0>(subsets)[0]; sub0.set_morton(morton_offset); sub0.start_x = set.start_x; sub0.start_y = set.start_y; @@ -189,8 +246,7 @@ auto sperr::SPECK3D_INT::m_partition_S_XYZ(Set3D set, uint16_t lev) const sub0.length_z = split_z[0]; // subset (1, 0, 0) - constexpr auto idx1 = 1 * offsets[0] + 0 * offsets[1] + 0 * offsets[2]; - auto& sub1 = std::get<0>(subsets)[idx1]; + auto& sub1 = std::get<0>(subsets)[1]; morton_offset += sub0.num_elem(); sub1.set_morton(morton_offset); sub1.start_x = set.start_x + split_x[0]; @@ -201,8 +257,7 @@ auto sperr::SPECK3D_INT::m_partition_S_XYZ(Set3D set, uint16_t lev) const sub1.length_z = split_z[0]; // subset (0, 1, 0) - constexpr auto idx2 = 0 * offsets[0] + 1 * offsets[1] + 0 * offsets[2]; - auto& sub2 = std::get<0>(subsets)[idx2]; + auto& sub2 = std::get<0>(subsets)[2]; morton_offset += sub1.num_elem(); sub2.set_morton(morton_offset); sub2.start_x = set.start_x; @@ -213,8 +268,7 @@ auto sperr::SPECK3D_INT::m_partition_S_XYZ(Set3D set, uint16_t lev) const sub2.length_z = split_z[0]; // subset (1, 1, 0) - constexpr auto idx3 = 1 * offsets[0] + 1 * offsets[1] + 0 * offsets[2]; - auto& sub3 = std::get<0>(subsets)[idx3]; + auto& sub3 = std::get<0>(subsets)[3]; morton_offset += sub2.num_elem(); sub3.set_morton(morton_offset); sub3.start_x = set.start_x + split_x[0]; @@ -225,8 +279,7 @@ auto sperr::SPECK3D_INT::m_partition_S_XYZ(Set3D set, uint16_t lev) const sub3.length_z = split_z[0]; // subset (0, 0, 1) - constexpr auto idx4 = 0 * offsets[0] + 0 * offsets[1] + 1 * offsets[2]; - auto& sub4 = std::get<0>(subsets)[idx4]; + auto& sub4 = std::get<0>(subsets)[4]; morton_offset += sub3.num_elem(); sub4.set_morton(morton_offset); sub4.start_x = set.start_x; @@ -237,8 +290,7 @@ auto sperr::SPECK3D_INT::m_partition_S_XYZ(Set3D set, uint16_t lev) const sub4.length_z = split_z[1]; // subset (1, 0, 1) - constexpr auto idx5 = 1 * offsets[0] + 0 * offsets[1] + 1 * offsets[2]; - auto& sub5 = std::get<0>(subsets)[idx5]; + auto& sub5 = std::get<0>(subsets)[5]; morton_offset += sub4.num_elem(); sub5.set_morton(morton_offset); sub5.start_x = set.start_x + split_x[0]; @@ -249,8 +301,7 @@ auto sperr::SPECK3D_INT::m_partition_S_XYZ(Set3D set, uint16_t lev) const sub5.length_z = split_z[1]; // subset (0, 1, 1) - constexpr auto idx6 = 0 * offsets[0] + 1 * offsets[1] + 1 * offsets[2]; - auto& sub6 = std::get<0>(subsets)[idx6]; + auto& sub6 = std::get<0>(subsets)[6]; morton_offset += sub5.num_elem(); sub6.set_morton(morton_offset); sub6.start_x = set.start_x; @@ -261,8 +312,7 @@ auto sperr::SPECK3D_INT::m_partition_S_XYZ(Set3D set, uint16_t lev) const sub6.length_z = split_z[1]; // subset (1, 1, 1) - constexpr auto idx7 = 1 * offsets[0] + 1 * offsets[1] + 1 * offsets[2]; - auto& sub7 = std::get<0>(subsets)[idx7]; + auto& sub7 = std::get<0>(subsets)[7]; morton_offset += sub6.num_elem(); sub7.set_morton(morton_offset); sub7.start_x = set.start_x + split_x[0]; diff --git a/src/SPERR/src/SPECK_INT.cpp b/src/SPERR/src/SPECK_INT.cpp index f1f4dc9e..fa04a877 100644 --- a/src/SPERR/src/SPECK_INT.cpp +++ b/src/SPERR/src/SPECK_INT.cpp @@ -5,6 +5,10 @@ #include #include +#if __cplusplus >= 202002L +#include +#endif + // // Free-standing helper function // @@ -54,7 +58,7 @@ void sperr::SPECK_INT::set_budget(size_t bud) } template -auto sperr::SPECK_INT::get_speck_bits(const void* buf) const -> uint64_t +auto sperr::SPECK_INT::get_speck_num_bits(const void* buf) const -> uint64_t { // Given the header definition, directly retrieve the value stored in bytes 1--9. const auto* const ptr = static_cast(buf); @@ -66,7 +70,7 @@ auto sperr::SPECK_INT::get_speck_bits(const void* buf) const -> uint64_t template auto sperr::SPECK_INT::get_stream_full_len(const void* buf) const -> uint64_t { - auto num_bits = get_speck_bits(buf); + auto num_bits = get_speck_num_bits(buf); while (num_bits % 8 != 0) ++num_bits; return (header_size + num_bits / 8); @@ -311,7 +315,17 @@ void sperr::SPECK_INT::m_refinement_pass_encode() const auto bits_x64 = m_LSP_mask.size() - m_LSP_mask.size() % 64; for (size_t i = 0; i < bits_x64; i += 64) { // Evaluate 64 bits at a time. - const auto value = m_LSP_mask.rlong(i); + auto value = m_LSP_mask.rlong(i); + +#if __cplusplus >= 202002L + while (value) { + auto j = std::countr_zero(value); + const bool o1 = m_coeff_buf[i + j] >= m_threshold; + m_coeff_buf[i + j] -= tmp1[o1]; + m_bit_buffer.wbit(o1); + value &= value - 1; + } +#else if (value != 0) { for (size_t j = 0; j < 64; j++) { if ((value >> j) & uint64_t{1}) { @@ -321,6 +335,7 @@ void sperr::SPECK_INT::m_refinement_pass_encode() } } } +#endif } for (auto i = bits_x64; i < m_LSP_mask.size(); i++) { // Evaluate the remaining bits. if (m_LSP_mask.rbit(i)) { @@ -357,7 +372,20 @@ void sperr::SPECK_INT::m_refinement_pass_decode() if (m_threshold >= uint_type{2}) { // <-- Point 1 const auto half_t = m_threshold / uint_type{2}; for (size_t i = 0; i < bits_x64; i += 64) { // <-- Point 2 - const auto value = m_LSP_mask.rlong(i); + auto value = m_LSP_mask.rlong(i); + +#if __cplusplus >= 202002L + while (value) { + auto j = std::countr_zero(value); + if (m_bit_buffer.rbit()) + m_coeff_buf[i + j] += half_t; + else + m_coeff_buf[i + j] -= half_t; + if (++read_pos == m_avail_bits) // <-- Point 3 + goto INITIALIZE_NEWLY_FOUND_POINTS_LABEL; // <-- Point 4 + value &= value - 1; + } +#else if (value != 0) { for (size_t j = 0; j < 64; j++) { if ((value >> j) & uint64_t{1}) { @@ -370,6 +398,7 @@ void sperr::SPECK_INT::m_refinement_pass_decode() } } } +#endif } for (auto i = bits_x64; i < m_LSP_mask.size(); i++) { // <-- Point 2 if (m_LSP_mask.rbit(i)) { @@ -381,10 +410,21 @@ void sperr::SPECK_INT::m_refinement_pass_decode() goto INITIALIZE_NEWLY_FOUND_POINTS_LABEL; // <-- Point 4 } } - } // Finish the case where `m_threshold >= 2`. + } // Finish the case where `m_threshold >= 2`. else { // Start the case where `m_threshold == 1`. for (size_t i = 0; i < bits_x64; i += 64) { - const auto value = m_LSP_mask.rlong(i); + auto value = m_LSP_mask.rlong(i); + +#if __cplusplus >= 202002L + while (value) { + auto j = std::countr_zero(value); + if (m_bit_buffer.rbit()) + ++(m_coeff_buf[i + j]); + if (++read_pos == m_avail_bits) + goto INITIALIZE_NEWLY_FOUND_POINTS_LABEL; + value &= value - 1; + } +#else for (size_t j = 0; j < 64; j++) { if ((value >> j) & uint64_t{1}) { if (m_bit_buffer.rbit()) @@ -393,6 +433,7 @@ void sperr::SPECK_INT::m_refinement_pass_decode() goto INITIALIZE_NEWLY_FOUND_POINTS_LABEL; } } +#endif } for (auto i = bits_x64; i < m_LSP_mask.size(); i++) { if (m_LSP_mask.rbit(i)) { diff --git a/src/SPERR/src/SPERR3D_OMP_C.cpp b/src/SPERR/src/SPERR3D_OMP_C.cpp index 22e71509..3ac128ab 100644 --- a/src/SPERR/src/SPERR3D_OMP_C.cpp +++ b/src/SPERR/src/SPERR3D_OMP_C.cpp @@ -259,7 +259,9 @@ auto sperr::SPERR3D_OMP_C::m_gather_chunk(const T* vol, // Will be subject to Named Return Value Optimization. return chunk_buf; } -template auto sperr::SPERR3D_OMP_C::m_gather_chunk(const float*, dims_type, std::array) - -> vecd_type; -template auto sperr::SPERR3D_OMP_C::m_gather_chunk(const double*, dims_type, std::array) - -> vecd_type; +template auto sperr::SPERR3D_OMP_C::m_gather_chunk(const float*, + dims_type, + std::array) -> vecd_type; +template auto sperr::SPERR3D_OMP_C::m_gather_chunk(const double*, + dims_type, + std::array) -> vecd_type; diff --git a/src/SPERR/src/SPERR3D_OMP_D.cpp b/src/SPERR/src/SPERR3D_OMP_D.cpp index 0546191c..a487bff0 100644 --- a/src/SPERR/src/SPERR3D_OMP_D.cpp +++ b/src/SPERR/src/SPERR3D_OMP_D.cpp @@ -144,6 +144,11 @@ auto sperr::SPERR3D_OMP_D::release_hierarchy() -> std::vector&& return std::move(m_hierarchy); } +auto sperr::SPERR3D_OMP_D::view_hierarchy() const -> const std::vector& +{ + return m_hierarchy; +} + auto sperr::SPERR3D_OMP_D::view_decoded_data() const -> const sperr::vecd_type& { return m_vol_buf; diff --git a/src/SPERR/src/SPERR3D_Stream_Tools.cpp b/src/SPERR/src/SPERR3D_Stream_Tools.cpp index 608045a4..2017b1ad 100644 --- a/src/SPERR/src/SPERR3D_Stream_Tools.cpp +++ b/src/SPERR/src/SPERR3D_Stream_Tools.cpp @@ -104,8 +104,8 @@ auto sperr::SPERR3D_Stream_Tools::get_stream_header(const void* p) const -> SPER return header; } -auto sperr::SPERR3D_Stream_Tools::progressive_read(std::string filename, unsigned pct) const - -> vec8_type +auto sperr::SPERR3D_Stream_Tools::progressive_read(const std::string& filename, + unsigned pct) const -> vec8_type { // Read the header of this bitstream. auto vec20 = sperr::read_n_bytes(filename, 20); diff --git a/src/SPERR/src/SPERR_C_API.cpp b/src/SPERR/src/SPERR_C_API.cpp index 867fb681..0628372f 100644 --- a/src/SPERR/src/SPERR_C_API.cpp +++ b/src/SPERR/src/SPERR_C_API.cpp @@ -8,15 +8,15 @@ #include "SPERR3D_Stream_Tools.h" -int C_API::sperr_comp_2d(const void* src, - int is_float, - size_t dimx, - size_t dimy, - int mode, - double quality, - int out_inc_header, - void** dst, - size_t* dst_len) +auto C_API::sperr_comp_2d(const void* src, + int is_float, + size_t dimx, + size_t dimy, + int mode, + double quality, + int out_inc_header, + void** dst, + size_t* dst_len) -> int { // Examine if `dst` is pointing to a NULL pointer if (*dst != nullptr) @@ -95,12 +95,12 @@ int C_API::sperr_comp_2d(const void* src, return 0; } -int C_API::sperr_decomp_2d(const void* src, - size_t src_len, - int output_float, - size_t dimx, - size_t dimy, - void** dst) +auto C_API::sperr_decomp_2d(const void* src, + size_t src_len, + int output_float, + size_t dimx, + size_t dimy, + void** dst) -> int { // Examine if `dst` is pointing to a NULL pointer if (*dst != nullptr) @@ -153,19 +153,19 @@ void C_API::sperr_parse_header(const void* src, *dimz = dims[2]; } -int C_API::sperr_comp_3d(const void* src, - int is_float, - size_t dimx, - size_t dimy, - size_t dimz, - size_t chunk_x, - size_t chunk_y, - size_t chunk_z, - int mode, - double quality, - size_t nthreads, - void** dst, - size_t* dst_len) +auto C_API::sperr_comp_3d(const void* src, + int is_float, + size_t dimx, + size_t dimy, + size_t dimz, + size_t chunk_x, + size_t chunk_y, + size_t chunk_z, + int mode, + double quality, + size_t nthreads, + void** dst, + size_t* dst_len) -> int { // Examine if `dst` is pointing to a NULL pointer if (*dst != nullptr) @@ -215,14 +215,14 @@ int C_API::sperr_comp_3d(const void* src, return 0; } -int C_API::sperr_decomp_3d(const void* src, - size_t src_len, - int output_float, - size_t nthreads, - size_t* dimx, - size_t* dimy, - size_t* dimz, - void** dst) +auto C_API::sperr_decomp_3d(const void* src, + size_t src_len, + int output_float, + size_t nthreads, + size_t* dimx, + size_t* dimy, + size_t* dimz, + void** dst) -> int { // Examine if `dst` is pointing to a NULL pointer. if (*dst != nullptr) @@ -257,11 +257,11 @@ int C_API::sperr_decomp_3d(const void* src, return 0; } -int C_API::sperr_trunc_3d(const void* src, - size_t src_len, - unsigned pct, - void** dst, - size_t* dst_len) +auto C_API::sperr_trunc_3d(const void* src, + size_t src_len, + unsigned pct, + void** dst, + size_t* dst_len) -> int { if (*dst != nullptr) return 1; diff --git a/src/SPERR/src/notes_on_clang-tidy b/src/SPERR/src/notes_on_clang-tidy index 8d308687..59442704 100644 --- a/src/SPERR/src/notes_on_clang-tidy +++ b/src/SPERR/src/notes_on_clang-tidy @@ -5,4 +5,4 @@ cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON . 2. I used the following clang-tidy options: -clang-tidy-10 ../src/SPECK3D.cpp -checks=-*,performance-*,portability-*,modernize-*,clang-analyzer-*,-modernize-avoid-c-arrays,-modernize-use-nodiscard -header-filter=../include/* -fix +clang-tidy ../src/SPECK3D.cpp -checks=-*,performance-*,portability-*,modernize-*,clang-analyzer-*,-modernize-avoid-c-arrays,-modernize-use-nodiscard -header-filter=../include/* -fix diff --git a/src/SPERR/src/sperr_helper.cpp b/src/SPERR/src/sperr_helper.cpp index 6737495b..eb45d23f 100644 --- a/src/SPERR/src/sperr_helper.cpp +++ b/src/SPERR/src/sperr_helper.cpp @@ -90,10 +90,10 @@ auto sperr::coarsened_resolutions(dims_type vdim, dims_type cdim) -> std::vector auto nz = vdim[2] / cdim[2]; resolutions = sperr::coarsened_resolutions(cdim); - for (size_t i = 0; i < resolutions.size(); i++) { - resolutions[i][0] *= nx; - resolutions[i][1] *= ny; - resolutions[i][2] *= nz; + for (auto& resolution : resolutions) { + resolution[0] *= nx; + resolution[1] *= ny; + resolution[2] *= nz; } } @@ -333,8 +333,9 @@ auto sperr::write_n_bytes(std::string filename, size_t n_bytes, const void* buff return RTNType::Good; } -auto sperr::read_sections(std::string filename, const std::vector& sections, vec8_type& dst) - -> RTNType +auto sperr::read_sections(std::string filename, + const std::vector& sections, + vec8_type& dst) -> RTNType { // Calculate the farthest file location to be read. size_t far = 0; @@ -494,8 +495,10 @@ auto sperr::calc_stats(const T* arr1, const T* arr2, size_t arr_len, size_t omp_ return {rmse, linfty, psnr, arr1min, arr1max}; } template auto sperr::calc_stats(const float*, const float*, size_t, size_t) -> std::array; -template auto sperr::calc_stats(const double*, const double*, size_t, size_t) - -> std::array; +template auto sperr::calc_stats(const double*, + const double*, + size_t, + size_t) -> std::array; template auto sperr::kahan_summation(const T* arr, size_t len) -> T @@ -514,8 +517,8 @@ auto sperr::kahan_summation(const T* arr, size_t len) -> T template auto sperr::kahan_summation(const float*, size_t) -> float; template auto sperr::kahan_summation(const double*, size_t) -> double; -auto sperr::chunk_volume(dims_type vol_dim, dims_type chunk_dim) - -> std::vector> +auto sperr::chunk_volume(dims_type vol_dim, + dims_type chunk_dim) -> std::vector> { // Step 1: figure out how many segments are there along each axis. auto n_segs = std::array(); diff --git a/src/SPERR/test_scripts/CMakeLists.txt b/src/SPERR/test_scripts/CMakeLists.txt index 9e7d4f6f..0dbf6c50 100644 --- a/src/SPERR/test_scripts/CMakeLists.txt +++ b/src/SPERR/test_scripts/CMakeLists.txt @@ -1,29 +1,29 @@ add_executable( sperr_helper sperr_helper_unit_test.cpp ) -target_link_libraries( sperr_helper PUBLIC SPERR gtest_main ) +target_link_libraries( sperr_helper PUBLIC SPERR GTest::gtest_main ) add_executable( bitstream bitstream_unit_test.cpp ) -target_link_libraries( bitstream PUBLIC SPERR gtest_main ) +target_link_libraries( bitstream PUBLIC SPERR GTest::gtest_main ) add_executable( dwt dwt_unit_test.cpp ) -target_link_libraries( dwt PUBLIC SPERR gtest_main ) +target_link_libraries( dwt PUBLIC SPERR GTest::gtest_main ) add_executable( speck_int speck_int_unit_test.cpp ) -target_link_libraries( speck_int PUBLIC SPERR gtest_main ) +target_link_libraries( speck_int PUBLIC SPERR GTest::gtest_main ) add_executable( outlier_coder outlier_coder_unit_test.cpp ) -target_link_libraries( outlier_coder PUBLIC SPERR gtest_main ) +target_link_libraries( outlier_coder PUBLIC SPERR GTest::gtest_main ) add_executable( speck2d_flt speck2d_flt_unit_test.cpp ) -target_link_libraries( speck2d_flt PUBLIC SPERR gtest_main ) +target_link_libraries( speck2d_flt PUBLIC SPERR GTest::gtest_main ) add_executable( speck3d_flt speck3d_flt_unit_test.cpp ) -target_link_libraries( speck3d_flt PUBLIC SPERR gtest_main ) +target_link_libraries( speck3d_flt PUBLIC SPERR GTest::gtest_main ) add_executable( sperr3d_omp sperr3d_omp_unit_test.cpp ) -target_link_libraries( sperr3d_omp PUBLIC SPERR gtest_main ) +target_link_libraries( sperr3d_omp PUBLIC SPERR GTest::gtest_main ) add_executable( stream_tools stream_tools_unit_test.cpp ) -target_link_libraries( stream_tools PUBLIC SPERR gtest_main ) +target_link_libraries( stream_tools PUBLIC SPERR GTest::gtest_main ) include(GoogleTest) gtest_discover_tests( sperr_helper ) diff --git a/src/SPERR/test_scripts/bitstream_unit_test.cpp b/src/SPERR/test_scripts/bitstream_unit_test.cpp index 26b9b846..a9cfdc17 100644 --- a/src/SPERR/test_scripts/bitstream_unit_test.cpp +++ b/src/SPERR/test_scripts/bitstream_unit_test.cpp @@ -360,7 +360,68 @@ TEST(Bitmask, BufferTransfer) EXPECT_EQ(src.rbit(i), dst.rbit(i)); } -#if defined __cpp_lib_three_way_comparison && defined __cpp_impl_three_way_comparison +TEST(Bitmask, has_true) +{ + const size_t mask_size = 210; + + // Loop over all positions + for (size_t idx = 0; idx < mask_size; idx++) { + auto mask = Mask(mask_size); + mask.wtrue(idx); + + // Loop over all starting positions + for (size_t start = 0; start < mask_size; start++) { + + // Loop over all range length + for (size_t len = 0; len < mask_size - start; len++) { + auto ans1 = mask.has_true(start, len); + auto ans2 = -1l; + for (size_t i = start; i < start + len; i++) + if (mask.rbit(i)) { + ans2 = 1; + break; + } + EXPECT_EQ(ans1, ans2); + } + + } + } +} + +TEST(Bitmask, has_true_position) +{ + const size_t mask_size = 210; + + // Loop over all positions + for (size_t idx = 0; idx < mask_size; idx++) { + auto mask = Mask(mask_size); + mask.wtrue(idx); + + // Loop over all starting positions + for (size_t start = 0; start < mask_size; start++) { + + // Loop over all range length + for (size_t len = 0; len < mask_size - start; len++) { + auto ans1 = mask.has_true(start, len); + auto ans2 = -1l; + for (size_t i = start; i < start + len; i++) + if (mask.rbit(i)) { + ans2 = i - start; + break; + } + EXPECT_EQ(ans1, ans2) << "idx = " << idx << ", start = " << start << ", len = " << len << std::endl; + if (ans1 != ans2) + goto END_LABEL; + } + + } + } + +END_LABEL: + {} +} + +#if __cplusplus >= 201907L && defined __cpp_lib_three_way_comparison TEST(Bitmask, spaceship) { auto src = Mask(60); diff --git a/src/SPERR/utilities/double_prec.cpp b/src/SPERR/utilities/raw_tools/double_prec.cpp similarity index 100% rename from src/SPERR/utilities/double_prec.cpp rename to src/SPERR/utilities/raw_tools/double_prec.cpp diff --git a/src/SPERR/utilities/show_version.cpp b/src/SPERR/utilities/show_version.cpp index cd894588..d1e23358 100644 --- a/src/SPERR/utilities/show_version.cpp +++ b/src/SPERR/utilities/show_version.cpp @@ -8,4 +8,5 @@ int main() << SPERR_VERSION_PATCH << std::endl; std::cout << "Based on code Branch: " << SPERR_GIT_BRANCH << std::endl; std::cout << "Based on code SHA1 : " << SPERR_GIT_SHA1 << std::endl; + std::cout << "C++ Standard Support: " << __cplusplus << std::endl; } diff --git a/src/SPERR/utilities/sperr2d.cpp b/src/SPERR/utilities/sperr2d.cpp index aa557f91..89a7313b 100644 --- a/src/SPERR/utilities/sperr2d.cpp +++ b/src/SPERR/utilities/sperr2d.cpp @@ -130,6 +130,7 @@ int main(int argc, char* argv[]) // auto bitstream = std::string(); app.add_option("--bitstream", bitstream, "Output compressed bitstream.") + ->needs(cptr) ->group("Output settings"); auto decomp_f32 = std::string(); @@ -188,6 +189,10 @@ int main(int argc, char* argv[]) // // A little extra sanity check. // + if (input_file.empty()) { + std::cout << "What's the input file?" << std::endl; + return __LINE__; + } if (!cflag && !dflag) { std::cout << "Is this compressing (-c) or decompressing (-d) ?" << std::endl; return __LINE__; @@ -215,6 +220,13 @@ int main(int argc, char* argv[]) std::cout << "SPERR needs an output destination when decoding!" << std::endl; return __LINE__; } + // Print a warning message if there's no output specified + if (cflag && bitstream.empty()) + std::cout << "Warning: no output file provided. Consider using --bitstream option." + << std::endl; + if (dflag && decomp_f64.empty() && decomp_f32.empty() && decomp_lowres_f64.empty() && + decomp_lowres_f32.empty()) + std::cout << "Warning: no output file provided." << std::endl; // // Really starting the real work! @@ -228,7 +240,7 @@ int main(int argc, char* argv[]) if ((ftype == 32 && (total_vals * 4 != input.size())) || (ftype == 64 && (total_vals * 8 != input.size()))) { std::cout << "Input file size wrong!" << std::endl; - return __LINE__; + return __LINE__ % 256; } auto encoder = std::make_unique(); encoder->set_dims(dims); @@ -259,7 +271,7 @@ int main(int argc, char* argv[]) auto rtn = encoder->compress(); if (rtn != sperr::RTNType::Good) { std::cout << "Compression failed!" << std::endl; - return __LINE__; + return __LINE__ % 256; } // Assemble the output bitstream. diff --git a/src/SPERR/utilities/sperr3d.cpp b/src/SPERR/utilities/sperr3d.cpp index bb4ffc1d..b23bf66a 100644 --- a/src/SPERR/utilities/sperr3d.cpp +++ b/src/SPERR/utilities/sperr3d.cpp @@ -12,8 +12,9 @@ // This functions takes in a filename, and a full resolution. It then creates a list of // filenames, each has the coarsened resolution appended. -auto create_filenames(std::string name, sperr::dims_type vdims, sperr::dims_type cdims) - -> std::vector +auto create_filenames(std::string name, + sperr::dims_type vdims, + sperr::dims_type cdims) -> std::vector { auto filenames = std::vector(); auto resolutions = sperr::coarsened_resolutions(vdims, cdims); @@ -141,6 +142,7 @@ int main(int argc, char* argv[]) // auto bitstream = std::string(); app.add_option("--bitstream", bitstream, "Output compressed bitstream.") + ->needs(cptr) ->group("Output settings"); auto decomp_f32 = std::string(); @@ -205,6 +207,10 @@ int main(int argc, char* argv[]) // // A little extra sanity check. // + if (input_file.empty()) { + std::cout << "What's the input file?" << std::endl; + return __LINE__; + } if (!cflag && !dflag) { std::cout << "Is this compressing (-c) or decompressing (-d) ?" << std::endl; return __LINE__; @@ -232,6 +238,30 @@ int main(int argc, char* argv[]) std::cout << "SPERR needs an output destination when decoding!" << std::endl; return __LINE__; } + // Also check if the chunk dims can support multi-resolution decoding. + if (cflag && (!decomp_lowres_f64.empty() || !decomp_lowres_f32.empty())) { + auto name = decomp_lowres_f64; + if (name.empty()) + name = decomp_lowres_f32; + assert(!name.empty()); + auto filenames = create_filenames(name, dims, chunks); + if (filenames.empty()) { + std::printf( + " Warning: the combo of volume dimension (%lu, %lu, %lu) and chunk dimension" + " (%lu, %lu, %lu)\n cannot support multi-resolution decoding. " + " Try to use chunk dimensions that\n are similar in length and" + " can divide the volume dimension.\n", + dims[0], dims[1], dims[2], chunks[0], chunks[1], chunks[2]); + return __LINE__ % 256; + } + } + // Print a warning message if there's no output specified + if (cflag && bitstream.empty()) + std::cout << "Warning: no output file provided. Consider using --bitstream option." + << std::endl; + if (dflag && decomp_f64.empty() && decomp_f32.empty() && decomp_lowres_f64.empty() && + decomp_lowres_f32.empty()) + std::cout << "Warning: no output file provided." << std::endl; // // Really starting the real work! @@ -242,7 +272,7 @@ int main(int argc, char* argv[]) if ((ftype == 32 && (total_vals * 4 != input.size())) || (ftype == 64 && (total_vals * 8 != input.size()))) { std::cout << "Input file size wrong!" << std::endl; - return __LINE__; + return __LINE__ % 256; } auto encoder = std::make_unique(); encoder->set_dims_and_chunks(dims, chunks);