diff --git a/doc/information.rst b/doc/information.rst
index 2894e8ac..3edace03 100644
--- a/doc/information.rst
+++ b/doc/information.rst
@@ -77,7 +77,7 @@ HDF5 compression filters and compression libraries sources were obtained from:
 * `SZ plugin <https://github.com/szcompressor/SZ>`_
   (commit `f466775 <https://github.com/szcompressor/SZ/tree/f4667759ead6a902110e80ff838ccdfddbc8dcd7>`_)
   using `SZ <https://github.com/szcompressor/SZ>`_, ZLib and ZStd.
-* `H5Z-SPERR plugin <https://github.com/NCAR/H5Z-SPERR>`_ (v0.1.3) using `SPERR <https://github.com/NCAR/SPERR>`_ (v0.8.1).
+* `H5Z-SPERR plugin <https://github.com/NCAR/H5Z-SPERR>`_ (v0.1.3) using `SPERR <https://github.com/NCAR/SPERR>`_ (v0.8.2).
 * `SZ3 plugin <https://github.com/szcompressor/SZ3>`_
   (commit `4bbe9df7e4bcb <https://github.com/szcompressor/SZ3/commit/4bbe9df7e4bcb6ae6339fcb3033100da07fe7434>`_)
   using `SZ3 <https://github.com/szcompressor/SZ3>`_ and ZStd.
diff --git a/src/SPERR/.github/workflows/clang-format.yml b/src/SPERR/.github/workflows/clang-format.yml
index 7b8b2393..8c19b1ea 100644
--- a/src/SPERR/.github/workflows/clang-format.yml
+++ b/src/SPERR/.github/workflows/clang-format.yml
@@ -22,9 +22,9 @@ jobs:
     steps:
     - uses: actions/checkout@v2
     - name: Run clang-format style check for C/C++ programs.
-      uses: jidicula/clang-format-action@v4.8.0
+      uses: jidicula/clang-format-action@v4.13.0
       with:
-        clang-format-version: '13'
+        clang-format-version: '18'
         check-path: ${{ matrix.path['check'] }}
         exclude-regex: ${{ matrix.path['exclude'] }}
         fallback-style: 'Chromium' # optional
diff --git a/src/SPERR/.github/workflows/clang-tidy.yml b/src/SPERR/.github/workflows/clang-tidy.yml.bkp
similarity index 96%
rename from src/SPERR/.github/workflows/clang-tidy.yml
rename to src/SPERR/.github/workflows/clang-tidy.yml.bkp
index e4423b3f..859d8a73 100644
--- a/src/SPERR/.github/workflows/clang-tidy.yml
+++ b/src/SPERR/.github/workflows/clang-tidy.yml.bkp
@@ -22,4 +22,4 @@ jobs:
         builddir: 'build'
         excludedirs: ''
         extensions: 'c,h,C,H,cpp,hpp,cc,hh,c++,h++,cxx,hxx'
-        cmakeoptions: '-DBUILD_CLI_UTILITIES=OFF -DBUILD_UNIT_TESTS=OFF -DUSE_ZSTD=OFF'
+        cmakeoptions: '-DBUILD_CLI_UTILITIES=OFF -DBUILD_UNIT_TESTS=OFF'
diff --git a/src/SPERR/CMakeLists.txt b/src/SPERR/CMakeLists.txt
index a48d03af..fb91e92d 100644
--- a/src/SPERR/CMakeLists.txt
+++ b/src/SPERR/CMakeLists.txt
@@ -2,14 +2,13 @@
 
 cmake_minimum_required(VERSION 3.14)
 
-project(SPERR VERSION 0.8.1 LANGUAGES CXX DESCRIPTION "Lossy Scientific Compression with SPERR")
+project(SPERR VERSION 0.8.2 LANGUAGES CXX DESCRIPTION "Lossy Scientific Compression with SPERR")
+
+if(NOT CMAKE_CXX_STANDARD)
+    set(CMAKE_CXX_STANDARD "20" CACHE STRING "Choose the C++ Standard to use." FORCE)
+    set_property(CACHE CMAKE_CXX_STANDARD PROPERTY STRINGS "20" "17")
+endif()
 
-#
-# specify the C++ standard
-# CMake will try to add a -std=c++20 flag if the compiler supports, but if it doesn't,
-# CMake will `decay` to a previous supported flag, e.g. -std=c++11.
-#
-set(CMAKE_CXX_STANDARD 20)
 if(NOT CMAKE_BUILD_TYPE)
     set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build." FORCE)
     set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "RelWithDebInfo")
@@ -32,7 +31,7 @@ endif()
 option( BUILD_SHARED_LIBS "Build shared SPERR library" ON )
 option( BUILD_UNIT_TESTS "Build unit tests using GoogleTest" ON )
 option( BUILD_CLI_UTILITIES "Build a set of command line utilities" ON )
-option( USE_OMP "Use OpenMP parallelization on 3D volumes" ON )
+option( USE_OMP "Use OpenMP parallelization on 3D volumes" OFF )
 option( SPERR_PREFER_RPATH "Set RPATH; this can fight with package managers so turn off when building for them" ON )
 mark_as_advanced(FORCE SPERR_PREFER_RPATH)
 
@@ -109,8 +108,7 @@ if( BUILD_CLI_UTILITIES )
   set( CLI11_SINGLE_FILE OFF CACHE INTERNAL "Don't use single file CLI11")
   FetchContent_Declare( cli11
     GIT_REPOSITORY https://github.com/CLIUtils/CLI11
-    GIT_TAG        291c58789c031208f08f4f261a858b5b7083e8e2 # v2.3.2
-    PATCH_COMMAND patch -N CMakeLists.txt < ${CMAKE_SOURCE_DIR}/cli11.patch || true
+    GIT_TAG        6c7b07a878ad834957b98d0f9ce1dbe0cb204fc9 # v2.4.2
   )
   FetchContent_MakeAvailable(cli11)
 
diff --git a/src/SPERR/README.md b/src/SPERR/README.md
index 90ef7365..8564cd22 100644
--- a/src/SPERR/README.md
+++ b/src/SPERR/README.md
@@ -1,9 +1,6 @@
 [![clang-format](https://github.com/NCAR/SPERR/actions/workflows/clang-format.yml/badge.svg)](https://github.com/NCAR/SPERR/actions/workflows/clang-format.yml)
-[![clang-tidy](https://github.com/NCAR/SPERR/actions/workflows/clang-tidy.yml/badge.svg)](https://github.com/NCAR/SPERR/actions/workflows/clang-tidy.yml)
 [![unit-test](https://github.com/NCAR/SPERR/actions/workflows/unit-test.yml/badge.svg)](https://github.com/NCAR/SPERR/actions/workflows/unit-test.yml)
 [![CodeQL](https://github.com/NCAR/SPERR/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/NCAR/SPERR/actions/workflows/codeql-analysis.yml)
-
-
 [![DOI](https://zenodo.org/badge/225491235.svg)](https://zenodo.org/badge/latestdoi/225491235)
 
 
@@ -20,6 +17,31 @@ This combination gives SPERR flexibility to compress targetting different qualit
 2) peak signal-to-noise ratio (PSNR), and 3) point-wise error (PWE).
 The name of SPERR stands for **SP**eck with **ERR**or bounding.
 
+## Quick Build
+SPERR requires 1) a working C++ compiler and 2) CMake tools to build. On a Unix-like system,
+the build steps are the following:
+
+```bash
+git clone https://github.com/NCAR/SPERR.git     # clone the repo
+mkdir SPERR/build                               # create the build directory
+cd SPERR/build                                  # enter the build directory
+cmake ..                                        # use cmake to configure the project
+cmake -DUSE_OMP=ON ..                           # Optional: enable OpenMP on 3D volumes.
+cmake -DCMAKE_INSTALL_PREFIX=/my/install/dir .. # Optional: specify a directory to install SPERR. The default is /usr/local .
+cmake -DCMAKE_CXX_STANDARD=17 ..                # Optional: use C++17 rather than C++20. The code is slightly faster with C++20.
+make -j 8                                       # build the project
+ctest .                                         # run unit tests, which should have 100% tests passed
+make install                                    # install the library and CLI tools to a specified directory.
+```
+
+## Plugin for HDF5
+SPERR is available as a *dynamically loaded plugin* for HDF5 with a registered ID of `32028`.
+This plugin is available at this [repo](https://github.com/NCAR/H5Z-SPERR).
+
+## Wrapper for Fortran
+A Fortran wrapper for SPERR has also been created by [ofmla](https://github.com/ofmla) 
+at this [repo](https://github.com/ofmla/fortran-sperr).
+
 ## Documentation
 
 SPERR documentation is hosted on Github [Wiki](https://github.com/NCAR/SPERR/wiki) pages. To get started, one might want to
@@ -45,5 +67,6 @@ If SPERR benefits your work, please kindly cite [this publication](https://ieeex
 (Author's copy is available [here](https://vast.ucar.edu/pdfs/SPERR_IPDPS.pdf).)
 
 ## Presentations
+- FZ Workshop Hands-on: Feb 15 2024, Sarasota, FL. ([handout and examples](https://vast.ucar.edu/pdfs/Li_FZ2024.pdf))
 - SC'23 Tutorial on lossy scientific data compression: Nov 13 2023, Denver CO. ([slides](https://vast.ucar.edu/pdfs/Li_SC23_Slides.pdf))
 - IPDPS'23 Lossy Scientific Data Compression With SPERR: May 18 2023, St. Petersburg, FL. ([slides](https://vast.ucar.edu/pdfs/Li_IPDPS23_Slides.pdf))
diff --git a/src/SPERR/cli11.patch b/src/SPERR/cli11.patch
deleted file mode 100644
index 3edf079b..00000000
--- a/src/SPERR/cli11.patch
+++ /dev/null
@@ -1,4 +0,0 @@
-1c1
-< cmake_minimum_required(VERSION 3.4)
----
-> cmake_minimum_required(VERSION 3.14)
diff --git a/src/SPERR/handout/compile.txt b/src/SPERR/handout/compile.txt
new file mode 100644
index 00000000..32c9199b
--- /dev/null
+++ b/src/SPERR/handout/compile.txt
@@ -0,0 +1 @@
+xelatex -shell-escape main.tex
diff --git a/src/SPERR/handout/main.tex b/src/SPERR/handout/main.tex
new file mode 100644
index 00000000..d5a7c4ba
--- /dev/null
+++ b/src/SPERR/handout/main.tex
@@ -0,0 +1,568 @@
+\documentclass{article}
+\usepackage{graphicx} % Required for inserting images
+\usepackage[colorlinks]{hyperref}
+\usepackage[margin=1in]{geometry}
+\usepackage[many]{tcolorbox}    	% for COLORED BOXES (tikz and xcolor included)
+\usepackage{minted}
+\usepackage{xspace}
+\setcounter{tocdepth}{1}
+\setminted{linenos}
+\setlength\parindent{0pt}   % killing indentation for all the text
+\setlength\columnsep{0.25in} % setting length of column separator
+\pagestyle{empty}           % setting pagestyle to be empty
+\definecolor{main}{HTML}{5989cf}    % setting main color to be used
+\definecolor{sub}{HTML}{cde4ff}     % setting sub color to be used
+\tcbset{
+    sharp corners,
+    colback = white,
+    before skip = 0.2cm,    % add extra space before the box
+    after skip = 0.5cm      % add extra space after the box
+}                           % setting global options for tcolorbox
+\newtcolorbox{BlueBox}{
+    sharpish corners, % better drop shadow
+    colback = sub, 
+    colframe = main, 
+    boxrule = 0pt, 
+    toprule = 4.5pt, % top rule weight
+    enhanced,
+    fuzzy shadow = {0pt}{-2pt}{-0.5pt}{0.5pt}{black!35} % {xshift}{yshift}{offset}{step}{options} 
+}
+\newcommand{\callout}[1]{\begin{BlueBox}#1\end{BlueBox}}
+
+
+
+\title{SPERR Interface and Examples}
+\author{Samuel Li}
+\date{\today}
+
+\begin{document}
+
+\vspace{-1cm}
+\maketitle
+
+\vspace{-1cm}
+
+\section{Introduction}
+This document is the SPERR section of the handout for the 
+\href{https://szcompressor.org/next.szcompressor.github.io/meetings/feb24fl/}{FZ workshop} 
+hands-on session on February 15 2024, Sarasota, FL.
+It provides examples for the CLI interface, C++ interface, and C interface of SPERR
+(Section~\ref{sec:cli}, \ref{sec:cpp}, and \ref{sec:c}, respectively).
+
+\section{SPERR}
+Contact: \href{shaomeng@ucar.edu}{Samuel Li (shaomeng@ucar.edu)} \\
+Repo: \href{https://github.com/NCAR/SPERR/}{github.com/NCAR/SPERR/} \\
+Wiki: \href{https://github.com/NCAR/SPERR/wiki}{github.com/NCAR/SPERR/wiki}
+
+\vspace{2mm}
+SPERR uses \textit{wavelet transforms} to decorrelate the data, encodes the quantized 
+coefficients, and explicitly corrects any data point exceeding a prescribed point-wise error
+(PWE) tolerance.
+Most often, SPERR produces the smallest compressed bitstream 
+honoring a PWE tolerance.
+
+A SPERR bitstream can be used to reconstruct the data fields 
+in two additional fashions: \textit{flexible-rate} decoding and
+\textit{multi-resolution} decoding.
+
+\vspace{-2mm}
+\begin{itemize}
+\item \textit{Flexible-rate} decoding: any prefix of a SPERR bitstream 
+(i.e., a sub-bitstream that starts from the very beginning)
+produced by a simple truncation is still valid to reconstruct the data 
+field, though at a lower quality.
+This ability is useful for applications such as tiered storage and data sharing 
+over slow connections, to name a few.
+\vspace{-2mm}
+\item \textit{Multi-resolution} decoding: a hierarchy of the data field
+with coarsened resolutions can be obtained together with the 
+native resolution.
+This ability is useful for quick analyses with limited resources.
+\end{itemize}
+
+\callout{On a Unix-like system with a working C++ compiler and CMake, SPERR can be
+built from source and made available to users in just a few commands.
+See this \href{https://github.com/NCAR/SPERR?tab=readme-ov-file\#quick-build}{README} 
+for detail.}
+
+
+\subsection{CLI Interface}
+\label{sec:cli}
+Upon a successful build, four CLI utility programs are placed in the \texttt{./bin/}
+directory; three of them are most relevant for end users: \texttt{sperr2d}, 
+\texttt{sperr3d}, and \texttt{sperr3d\_trunc};
+each of them can be invoked with the \texttt{-h} option to display a help message.
+
+\subsubsection{\texttt{sperr2d}}
+\texttt{sperr2d} is responsible for compressing and decompressing a 2D data slice.
+In compression mode (\texttt{-c}), it takes as input a raw binary file consisting of 
+single- or double-precision floating point values, and outputs a compressed bitstream.
+In decompression mode (\texttt{-d}), it takes as input a compressed bitstream, and
+outputs the decompressed binary file of single- or double-precision floating point values.
+Its help message contains all the options \texttt{sperr2d} takes:
+
+\begin{minted}{bash}
+$ ./bin/sperr2d -h
+
+Usage: ./bin/sperr2d [OPTIONS] [filename]
+
+Positionals:
+  filename TEXT:FILE          A data slice to be compressed, or
+                              a bitstream to be decompressed.
+
+Options:
+  -h,--help                   Print this help message and exit
+
+Execution settings:
+  -c Excludes: -d             Perform a compression task.
+  -d Excludes: -c             Perform a decompression task.
+
+Input properties:
+  --ftype UINT                Specify the input float type in bits. Must be 32 or 64.
+  --dims [UINT,UINT]          Dimensions of the input slice. E.g., `--dims 128 128`
+                              (The fastest-varying dimension appears first.)
+
+Output settings:
+  --bitstream TEXT            Output compressed bitstream.
+  --decomp_f TEXT             Output decompressed slice in f32 precision.
+  --decomp_d TEXT             Output decompressed slice in f64 precision.
+  --decomp_lowres_f TEXT      Output lower resolutions of the decompressed slice in f32 precision.
+  --decomp_lowres_d TEXT      Output lower resolutions of the decompressed slice in f64 precision.
+  --print_stats Needs: -c     Show statistics measuring the compression quality.
+
+Compression settings (choose one):
+  --pwe FLOAT Excludes: --psnr --bpp
+                              Maximum point-wise error (PWE) tolerance.
+  --psnr FLOAT Excludes: --pwe --bpp
+                              Target PSNR to achieve.
+  --bpp FLOAT:FLOAT in [0 - 64] Excludes: --pwe --psnr
+                              Target bit-per-pixel (bpp) to achieve.
+\end{minted}
+
+Examples:
+\begin{enumerate}
+\item Compress a 2D slice in $512\times512$ dimension, single-precision floats
+      with a PWE tolerance of $10^{-2}$: \\ 
+      \texttt{./bin/sperr2d -c --ftype 32 --dims 512 512  --pwe 1e-2 
+              \textbackslash \\
+               --bitstream ./out.stream ./in.f32}
+
+\item Perform the compression task described above, plus also write out the 
+      compress-decompressed slice, and finally print statistics
+      measuring the compression quality: \\
+      \texttt{./bin/sperr2d -c --ftype 32 --dims 512 512  --pwe 1e-2
+               \textbackslash \\
+               --decomp\_f ./out.decomp --print\_stats
+               --bitstream ./out.stream ./in.f32}
+
+\item Decompress from a SPERR bitstream, and write out the slice in native 
+      and coarsened resolutions: \\
+      \texttt{./bin/sperr2d -d --decomp\_f ./out.decomp --decomp\_lowres\_f ./out.lowres 
+              ./sperr.stream}
+      In this example, the output file \texttt{out.decomp} will be of the native resolution, 
+      and six other files (\texttt{out.lowres.256x256, out.lowres.128x128, etc.}) 
+      will also be produced with their filenames indicating the coarsened resolution.
+\end{enumerate}
+
+\subsubsection{sperr3d}
+\label{sec:sperr3d}
+\texttt{sperr3d} is responsible for compressing and decompressing a 2D data volume.
+Similar to \texttt{sperr2d}, it operates in either compression (\texttt{-c}) or 
+decompression (\texttt{-d}) mode, converting between raw binary floating-point values 
+and compressed bitstreams. 
+Different from \texttt{sperr2d} which compresses the input 2D slice as a whole, 
+\texttt{sperr3d} divides an input 3D volume into smaller chunks, and then compresses
+each chunk individually.
+This chunking step allows for compressing and decompressing all the small chunks 
+individually and in parallel.
+\texttt{sperr3d} uses $256^3$ as the default chunk size, but any number from dozens to
+low hundreds would work well.
+(Chunk dimensions that can divide the full volume are preferred, but not mandatory.)
+Command line options \texttt{--chunks} and \texttt{--omp} control the chunking and 
+parallel execution behavior respectivelly.
+The help message of \texttt{sperr3d} details all the options it takes:
+
+\begin{minted}{bash}
+$ ./bin/sperr3d -h
+
+Usage: ./bin/sperr3d [OPTIONS] [filename]
+
+Positionals:
+  filename TEXT:FILE          A data volume to be compressed, or
+                              a bitstream to be decompressed.
+
+Options:
+  -h,--help                   Print this help message and exit
+
+Execution settings:
+  -c Excludes: -d             Perform a compression task.
+  -d Excludes: -c             Perform a decompression task.
+  --omp UINT                  Number of OpenMP threads to use. Default (or 0) to use all.
+
+Input properties (for compression):
+  --ftype UINT                Specify the input float type in bits. Must be 32 or 64.
+  --dims [UINT,UINT,UINT]     Dimensions of the input volume. E.g., `--dims 128 128 128`
+                              (The fastest-varying dimension appears first.)
+
+Output settings:
+  --bitstream TEXT            Output compressed bitstream.
+  --decomp_f TEXT             Output decompressed volume in f32 precision.
+  --decomp_d TEXT             Output decompressed volume in f64 precision.
+  --decomp_lowres_f TEXT      Output lower resolutions of the decompressed volume in f32 precision.
+  --decomp_lowres_d TEXT      Output lower resolutions of the decompressed volume in f64 precision.
+  --print_stats Needs: -c     Print statistics measuring the compression quality.
+
+Compression settings:
+  --chunks [UINT,UINT,UINT]   Dimensions of the preferred chunk size. Default: 256 256 256
+                              (Volume dims don't need to be divisible by these chunk dims.)
+  --pwe FLOAT Excludes: --psnr --bpp
+                              Maximum point-wise error (PWE) tolerance.
+  --psnr FLOAT Excludes: --pwe --bpp
+                              Target PSNR to achieve.
+  --bpp FLOAT:FLOAT in [0 - 64] Excludes: --pwe --psnr
+                              Target bit-per-pixel (bpp) to achieve.
+\end{minted}
+
+Examples:
+\begin{enumerate}
+\item Compress a 3D volume in $384\times384\times256$ dimension, double-precision floats,
+      using a PWE tolerance of $10^{-9}$ and chunks of $192\times192\times256$: \\
+      \texttt{./bin/sperr3d -c --omp 4 --ftype 64 --dims 384 384 256 --chunks 192 192 256 
+      \textbackslash \\
+      --pwe 1e-9 --bitstream ./out.stream ./in.f64}
+\item Perform the compression task described above, plus also write out the 
+      compress-decompressed volume, and finally print statistics
+      measuring the compression quality: \\
+       \texttt{./bin/sperr3d -c --omp 4 --ftype 64 --dims 384 384 256 --chunks 192 192 256 
+       \textbackslash \\
+       --pwe 1e-9 --decomp\_d ./out.decomp --print\_stats --bitstream ./out.stream ./in.f64}
+\item Decompress from a SPERR bitstream, and write out the volume in native 
+      and coarsened resolutions: \\
+      \texttt{./bin/sperr3d -d --decomp\_d ./out.decomp --decomp\_lowres\_d ./out.lowres 
+              ./sperr.stream}
+      In this example, the output file \texttt{out.decomp} will be of the native resolution, 
+      and five other files (\texttt{out.lowres.192x192x128, out.lowres.96x96x64, etc.}) 
+      will also be produced with their filenames indicating the coarsened resolution.
+\end{enumerate}
+
+\callout{To support multi-resolution decoding in 3D cases, the individual chunks 
+(\texttt{--chunks}) need to 1) approximate a cube, so that there are the same number
+of wavelet transforms performed on each dimension, and 2) divide the full volume
+in each dimension.}
+
+\subsubsection{\texttt{sperr3d\_trunc} and Flexible-Rate Decoding}
+Compressed SPERR bitstreams support \textit{flexible-rate} decoding, meaning that a
+sub-bitstream of it from the beginning can still be used to reconstruct the data field.
+Equally important, the reconstruction will have the best possible quality (in terms of
+average error) under the the size constraint of the sub-bitstream, though lower quality
+than using the full bitstream.
+Given a compressed bitstream, a sub-bitstream can be obtained by a simple truncation,
+for example, a truncation that keeps the first 10\% of the full bitstream.
+%On Unix systems, utility tool \texttt{head} can easily perform this task.
+
+The chunking scheme used in 3D compression (see Section~\ref{sec:sperr3d}) 
+brings some complication,
+because the bitstream representing each chunk needs to be truncated \textit{individually}.
+\texttt{sperr3d\_trunc} is thus introduced to properly truncate a multi-chunked SPERR bitstream.
+Specifically, it 1) locates the bitstream representing each chunk,
+2) truncates the bitstream, and 3) stitches all truncated bitstreams together so
+\texttt{sperr3d} can properly decompress it.
+
+The help message of \texttt{sperr3d\_trunc} details its options:
+
+\callout{SPERR bitstreams without using multi-chunks (i.e., \texttt{--dims} equals 
+\texttt{--chunks} in 3D, and all 2D cases) can safely be truncated by any method.
+For example, the following command truncates a compressed bitstream \texttt{field.stream}
+to keep its first 5kB as \texttt{field2.stream}, which is also recognized by SPERR:\\
+\texttt{head -c 5000 density.stream > density2.stream}
+}
+
+\begin{minted}{bash}
+$ ./bin/sperr3d_trunc -h
+
+Usage: ./bin/sperr3d_trunc [OPTIONS] [filename]
+
+Positionals:
+  filename TEXT:FILE          The original SPERR3D bitstream to be truncated.
+
+Options:
+  -h,--help                   Print this help message and exit
+
+Truncation settings:
+  --pct UINT REQUIRED         Percentage (1--100) of the original bitstream to truncate.
+  --omp UINT                  Number of OpenMP threads to use. Default (or 0) to use all.
+
+Output settings:
+  -o TEXT                     Write out the truncated bitstream.
+
+Input settings:
+  --orig32 TEXT               Original raw data in 32-bit precision to calculate compression
+                              quality using the truncated bitstream.
+  --orig64 TEXT               Original raw data in 64-bit precision to calculate compression
+                              quality using the truncated bitstream.
+\end{minted}
+
+Examples:
+\begin{enumerate}
+\item Produce a truncated version of a bitstream using 10\% of the original length: \\
+      \texttt{./bin/sperr3d\_trunc --pct 10 -o ./stream.10 ./bitstream}
+\item Perform the task above, plus evaluate compression quality using the truncated
+      bitstream:\\
+      \texttt{./bin/sperr3d\_trunc --pct 10 -o ./stream.10 --orig64 ./data.f64 ./bitstream}
+\end{enumerate}
+
+\subsection{C++ Interface}
+\label{sec:cpp}
+\subsubsection{2D Compression and Decompression}
+C++ class \texttt{sperr::SPECK2D\_FLT} is responsible for 2D compression and decompression.
+The sample code walks through necessary steps to perform a compression and decompression
+task, and a more concrete example can be found
+\href{https://github.com/NCAR/SPERR/blob/main/utilities/sperr2d.cpp}{here}.
+
+\begin{minted}{cpp}
+//
+// Example of using a sperr::SPECK2D_FLT() to compress a 2D slice.
+// This is a 6-step process.
+//
+#include "SPECK2D_FLT.h"
+
+// Step 1: create an encoder:
+auto encoder = sperr::SPECK2D_FLT();
+
+// Step 2: specify the 2D slice dimension (the third dimension is left with 1):
+encoder.set_dims({128, 128, 1});
+
+// Step 3: copy over the input data from a raw pointer (float* or double*):
+encoder.copy_data(ptr, 16'384);      // 16,384 is the number of values.
+// Step 3 alternative: one can hand a memory buffer to the encoder to avoid a memory copy;
+// use either version is cool.
+encoder.take_data(std::move(input)); // input is of type std::vector<doubles>.
+
+// Step 4: specify the compression quality measured in one of three metrics;
+// only the last invoked quality metric is honored.
+encoder.set_tolerance(1e-9);         // PWE tolerance = 1e-9
+encoder.set_bitrate(2.2);            // Target bitrate = 2.2
+encoder.set_psnr(102.2);             // Target PSNR = 102.2
+
+// Step 5: perform the compression task:
+encoder.compress();
+
+// Step 6: retrieve the compressed bitstream:
+auto bitstream = std::vector<uint8_t>();
+encoder.append_encoded_bitstream(bitstream);
+\end{minted}
+
+\begin{minted}{cpp}
+//
+// Example of using a sperr::SPECK2D_FLT() to decompress a bitstream.
+// This is a 5-step process.
+//
+#include "SPECK2D_FLT.h"
+
+// Step 1: create a decoder:
+auto decoder = sperr::SPECK2D_FLT();
+
+// Step 2: specify the 2D slice dimension (the third dimension is left with 1):
+// This information is often saved once somewhere for many same-sized slices.
+decoder.set_dims({128, 128, 1});
+
+// Step 3: pass in the compressed bitstream as a raw pointer (uint8_t*):
+decoder.use_bitstream(ptr, 16'384);  // 16,384 is the length of the bitstream.
+
+// Step 4: perform the decompression task:
+decoder.decompress(multi_res);  // a boolean, if to enable multi-resolution decoding
+
+// Step 5: retrieve the decompressed volume:
+std::vector<double> vol = decoder.view_decoded_data();
+auto hierarchy = decoder.view_hierarchy();     // if multi-resolution was enabled
+// Step 5 alternative: one can take ownership of the data buffer to avoid a memory copy.
+std::vector<double> vol = decoder.release_decoded_data();
+auto hierarchy = decoder.release_hierarchy();  // if multi-resolution was enabled
+\end{minted}
+
+\subsubsection{3D Compression and Decompression}
+C++ class \texttt{sperr::SPERR3D\_OMP\_C} is responsible for 3D compression, and 
+\texttt{sperr::SPERR3D\_OMP\_D} is responsible for 3D decompression.
+The sample code walks through necessary steps to perform a compression and decompression
+task, and a more concrete example can be found
+\href{https://github.com/NCAR/SPERR/blob/main/utilities/sperr3d.cpp}{here}.
+
+\begin{minted}{cpp}
+//
+// Example of using a sperr::SPERR3D_OMP_C() to compress a 3D volume.
+// This is a 6-step process.
+//
+#include "SPERR3D_OMP_C.h"
+
+// Step 1: create an encoder:
+auto encoder = sperr::SPERR3D_OMP_C();
+
+// Step 2: specify the volume and chunk dimensions, respectively:
+encoder.set_dims_and chunks({384, 384, 256}, {192, 192, 128});
+
+// Step 3: specify the number of OpenMP threads to use:
+encoder.set_num_threads(4);
+
+// Step 4: specify the compression quality measured in one of three metrics;
+// only the last invoked quality metric is honored.
+encoder.set_tolerance(1e-9);         // PWE tolerance = 1e-9
+encoder.set_bitrate(2.2);            // Target bitrate = 2.2
+encoder.set_psnr(102.2);             // Target PSNR = 102.2
+
+// Step 5: perform the compression task:
+// The input data is passed in in the form of a raw pointer (float* or double*),
+// and the total number of values will be passed in here too.
+encoder.compress(ptr, 384 * 384 * 256);
+
+// Step 6: retrieve the compressed bitstream:
+std::vector<uint8_t> stream = encoder.get_encoded_bitstream();
+\end{minted}
+
+\begin{minted}{cpp}
+//
+// Example of using a sperr::SPERR3D_OMP_D() to decompress a bitstream.
+// This is a 5-step process.
+//
+#include "SPERR3D_OMP_D.h"
+
+// Step 1: create a decoder:
+auto decoder = sperr::SPERR3D_OMP_D();
+
+// Step 2: specify the number of OpenMP threads to use:
+decoder.set_num_threads(4);
+
+// Step 3: pass in the compressed bitstream as a raw pointer (uint8_t*):
+decoder.use_bitstream(ptr, 16'384);  // 16,384 is the length of the bitstream.
+
+// Step 4: perform the decompression task:
+// Note that the pointer to the bitstream is passed in again!
+decoder.decompress(ptr, multi_res);  // a boolean, if to enable multi-resolution decoding
+
+// Step 5: retrieve the decompressed volume:
+auto [dimx, dimy, dimz] = decoder.get_dims();  // dimension of the volume
+std::vector<double> vol = decoder.view_decoded_data();
+auto hierarchy = decoder.view_hierarchy();     // if multi-resolution was enabled
+// Step 5 alternative: one can take ownership of the data buffer to avoid memory copies.
+std::vector<double> vol = decoder.release_decoded_data();
+auto hierarchy = decoder.release_hierarchy();  // if multi-resolution was enabled
+\end{minted}
+
+\callout{To achieve higher performance with repeated compression and 
+         decompression tasks, the encoder and decoder objects are better to be
+         re-used rather than repeatedly destroyed and created.}
+
+\subsection{C Interface}
+\label{sec:c}
+SPERR provides a C wrapper with a set of C functions. 
+All of the C interface is in the header file
+\href{https://github.com/NCAR/SPERR/blob/main/include/SPERR_C_API.h}{\texttt{SPERR\_C\_API.h}},
+which itself documents the C functions and parameters, etc.
+The following example code walks through key steps to use the C API to perform
+compression and decompression, while more concrete examples are available
+for \href{https://github.com/NCAR/SPERR/blob/main/examples/C_API/2d.c}{2D}
+and \href{https://github.com/NCAR/SPERR/blob/main/examples/C_API/3d.c}{3D} cases.
+
+\subsubsection{Example: 2D}
+
+\begin{minted}{C}
+/*
+ * Example of using the SPERR C API to perform 2D compression and decompression tasks.
+ */
+#include "SPERR_C_API.h"
+
+/* Step 1: create variables to keep the output: */
+void* stream = NULL;  /* caller is responsible for free'ing it after use. */
+size_t stream_len = 0;
+
+/* Step 2: call the 2D compression function:
+ * Assume that we have a buffer of 128 * 128 floats (in float* type) to be compressed, 
+ * using PWE tolerance = 1e-3.
+ */
+int ret = sperr_comp_2d(ptr,           /* memory buffer containing the input */
+                        1,             /* the input is of type float; 0 means double. */
+                        128,           /* dimx */
+                        128,           /* dimy */
+                        3,             /* compression mode; 3 means fixed PWE */
+                        1e-3,          /* actual PWE tolerance */
+                        0,             /* not using a header for the output bitstream */
+                        &stream,       /* will hold the compressed bitstream */
+                        &stream_len);  /* length of the compressed bitstream */
+assert(ret == 0);
+
+/* 
+ * Now that the 2D compression is completed, one can decompress the bitstream to
+ * retrieve the raw values, as the rest of this example shows.
+ */
+
+/* Step 3: create a pointer to hold the decompressed values: */
+void* output = NULL;  /* caller is responsible for free'ing it after use. */
+
+/* Step 4: call the 2D decompression function: */
+int ret2 = sperr_decomp_2d(stream,      /* compressed bitstream */
+                           stream_len,  /* compressed bitstream length */
+                           1,           /* decompress to floats. 0 means to doubles. */
+                           128,         /* dimx */
+                           128,         /* dimy */
+                           &output);    /* decompressed data is stored here */
+assert(ret2 == 0);
+free(output);   /* cleanup */
+free(stream);   /* cleanup */
+\end{minted}
+
+\subsubsection{Example: 3D}
+
+\begin{minted}{C}
+/*
+ * Example of using the SPERR C API to perform 3D compression and decompression tasks.
+ */
+#include "SPERR_C_API.h"
+
+/* Step 1: create variables to keep the output: */
+void* stream = NULL;  /* caller is responsible for free'ing it after use. */
+size_t stream_len = 0;
+
+/* Step 2: call the 3D compression function:
+ * Assume that we have a buffer of 256^3 floats (in float* type) to be compressed,
+ * using PWE tolerance = 1e-3 and chunk dimension of 128^3.
+ */
+int ret = sperr_comp_3d(ptr,           /* memory buffer containing the input */
+                        1,             /* the input is of type float; 0 means double. */
+                        256,           /* dimx */
+                        256,           /* dimy */
+                        256,           /* dimz */
+                        128,           /* chunk_x */
+                        128,           /* chunk_y */
+                        128,           /* chunk_z */
+                        3,             /* compression mode; 3 means fixed PWE */
+                        1e-3,          /* actual PWE tolerance */
+                        4,             /* use 4 OpenMP threads */
+                        &stream,       /* will hold the compressed bitstream */
+                        &stream_len);  /* length of the compressed bitstream */
+assert(ret == 0);
+
+/* 
+ * Now that the 3D compression is completed, one can decompress the bitstream to
+ * retrieve the raw values, as the rest of this example shows.
+ */
+
+/* Step 3: create a pointer to hold the decompressed values,
+ * and also variables to hold the volume dimensions.
+ */
+void* output = NULL;  /* caller is responsible for free'ing it after use. */
+size_t dimx = 0, dimy = 0, dimz = 0;
+
+/* Step 4: call the 3D decompression function: */
+int ret2 = sperr_decomp_3d(stream,      /* compressed bitstream */
+                           stream_len,  /* compressed bitstream length */
+                           1,           /* decompress to floats. 0 means to doubles. */
+                           4,           /* use 4 OpenMP threads */
+                           &dimx,       /* dimx of the decompressed volume */
+                           &dimy,       /* dimy of the decompressed volume */
+                           &dimz,       /* dimz of the decompressed volume */
+                           &output);    /* decompressed data is stored here */
+assert(ret2 == 0);
+free(output);   /* cleanup */
+free(stream);   /* cleanup */
+\end{minted}
+
+\end{document}
diff --git a/src/SPERR/include/Bitmask.h b/src/SPERR/include/Bitmask.h
index 3805d083..052f8a67 100644
--- a/src/SPERR/include/Bitmask.h
+++ b/src/SPERR/include/Bitmask.h
@@ -37,16 +37,24 @@ class Bitmask {
   //
   auto rlong(size_t idx) const -> uint64_t;  // `idx` of the bit, not the long.
   auto rbit(size_t idx) const -> bool;
+
+  // Functions to perform bulk tests.
+  //
+  // Two versions of the `has_true()` function. Both versions return -1 in case of no true found.
+  //   1) Position == false: it returns 1 indicating finding a true.
+  //   2) Position == true:  it returns the offset relative to `start` of the first true.
+  template <bool Position>
+  auto has_true(size_t start, size_t len) const -> int64_t;
   auto count_true() const -> size_t;  // How many 1's in this mask?
 
   // Functions for write
   //
-  void wlong(size_t idx, uint64_t value);  // `idx` of the bit, not the long.
   void wbit(size_t idx, bool bit);
-  void wtrue(size_t idx);   // This is faster than `wbit(idx, true)`.
-  void wfalse(size_t idx);  // This is faster than `wbit(idx, false)`.
-  void reset();             // Set the current bitmask to be all 0's.
-  void reset_true();        // Set the current bitmask to be all 1's.
+  void wlong(size_t idx, uint64_t value);  // `idx` of the bit, not the long.
+  void wtrue(size_t idx);                  // This is faster than `wbit(idx, true)`.
+  void wfalse(size_t idx);                 // This is faster than `wbit(idx, false)`.
+  void reset();                            // Set the current bitmask to be all 0's.
+  void reset_true();                       // Set the current bitmask to be all 1's.
 
   // Functions for direct access of the underlying data buffer
   // Note: `use_bitstream()` reads the number of values (uint64_t type) that provide
@@ -55,7 +63,7 @@ class Bitmask {
   auto view_buffer() const -> const std::vector<uint64_t>&;
   void use_bitstream(const void* p);
 
-#if defined __cpp_lib_three_way_comparison && defined __cpp_impl_three_way_comparison
+#if __cplusplus >= 202002L && defined __cpp_lib_three_way_comparison
   auto operator<=>(const Bitmask& rhs) const noexcept;
   auto operator==(const Bitmask& rhs) const noexcept -> bool;
 #endif
diff --git a/src/SPERR/include/SPECK3D_INT.h b/src/SPERR/include/SPECK3D_INT.h
index 3859d960..881f98d5 100644
--- a/src/SPERR/include/SPECK3D_INT.h
+++ b/src/SPERR/include/SPECK3D_INT.h
@@ -62,7 +62,7 @@ class SPECK3D_INT : public SPECK_INT<T> {
   virtual void m_process_S(size_t idx1, size_t idx2, size_t& counter, bool) = 0;
   virtual void m_process_P(size_t i, size_t m, size_t& c, bool) = 0;  // Called by `m_code_S()`.
   virtual void m_process_P_lite(size_t idx) = 0;  // Called by `m_sorting_pass()` directly.
-  virtual void m_additional_initialization() = 0;
+  virtual void m_additional_initialization() {};  // empty by default
 
   void m_code_S(size_t idx1, size_t idx2);
   auto m_partition_S_XYZ(Set3D, uint16_t) const -> std::tuple<std::array<Set3D, 8>, uint16_t>;
diff --git a/src/SPERR/include/SPECK3D_INT_DEC.h b/src/SPERR/include/SPECK3D_INT_DEC.h
index 6c7f1973..3ccb6799 100644
--- a/src/SPERR/include/SPECK3D_INT_DEC.h
+++ b/src/SPERR/include/SPECK3D_INT_DEC.h
@@ -24,7 +24,6 @@ class SPECK3D_INT_DEC final : public SPECK3D_INT<T> {
   void m_process_S(size_t idx1, size_t idx2, size_t& counter, bool read) final;
   void m_process_P(size_t idx, size_t no_use, size_t& counter, bool read) final;
   void m_process_P_lite(size_t idx) final;
-  void m_additional_initialization() final{};  // empty function
 };
 
 };  // namespace sperr
diff --git a/src/SPERR/include/SPECK3D_INT_ENC.h b/src/SPERR/include/SPECK3D_INT_ENC.h
index 2b73fa67..2ccb25e2 100644
--- a/src/SPERR/include/SPECK3D_INT_ENC.h
+++ b/src/SPERR/include/SPECK3D_INT_ENC.h
@@ -3,8 +3,6 @@
 
 #include "SPECK3D_INT.h"
 
-#include <optional>
-
 namespace sperr {
 
 //
diff --git a/src/SPERR/include/SPECK_INT.h b/src/SPERR/include/SPECK_INT.h
index 412d8e6c..1abd2a8a 100644
--- a/src/SPERR/include/SPECK_INT.h
+++ b/src/SPERR/include/SPECK_INT.h
@@ -44,7 +44,7 @@ class SPECK_INT {
   // Note: `speck_int_get_num_bitplanes()` is provided as a free-standing helper function (above).
   //
   // Retrieve the number of useful bits of a SPECK bitstream from its header.
-  auto get_speck_bits(const void*) const -> uint64_t;
+  auto get_speck_num_bits(const void*) const -> uint64_t;
   // Retrieve the number of bytes of a SPECK bitstream (including header) from its header.
   auto get_stream_full_len(const void*) const -> uint64_t;
 
diff --git a/src/SPERR/include/SPERR3D_OMP_D.h b/src/SPERR/include/SPERR3D_OMP_D.h
index 81117676..6b7d56b1 100644
--- a/src/SPERR/include/SPERR3D_OMP_D.h
+++ b/src/SPERR/include/SPERR3D_OMP_D.h
@@ -24,6 +24,7 @@ class SPERR3D_OMP_D {
   auto decompress(const void* bitstream, bool multi_res = false) -> RTNType;
 
   auto view_decoded_data() const -> const sperr::vecd_type&;
+  auto view_hierarchy() const -> const std::vector<vecd_type>&;
   auto release_decoded_data() -> sperr::vecd_type&&;
   auto release_hierarchy() -> std::vector<vecd_type>&&;
 
diff --git a/src/SPERR/include/SPERR3D_Stream_Tools.h b/src/SPERR/include/SPERR3D_Stream_Tools.h
index 0fb03779..11cb65ae 100644
--- a/src/SPERR/include/SPERR3D_Stream_Tools.h
+++ b/src/SPERR/include/SPERR3D_Stream_Tools.h
@@ -36,7 +36,7 @@ class SPERR3D_Stream_Tools {
 
   // Function that reads in portions of a file only to facilitate progressive access.
   // (This function does not read the whole file.)
-  auto progressive_read(std::string filename, unsigned pct) const -> vec8_type;
+  auto progressive_read(const std::string& filename, unsigned pct) const -> vec8_type;
 
   // Function that truncates a bitstream in the memory to facilitate progressive access.
   //    Note on `stream_len`: it does not need to be the full length of the original bitstream,
@@ -56,8 +56,9 @@ class SPERR3D_Stream_Tools {
   // Given the header of a bitstream and a desired percentage to truncate, return an
   //    updated header and a list of {offset, len} to access.
   //    Note: this function assumes that the header is complete.
-  auto m_progressive_helper(const void* header_buf, size_t buf_len, unsigned pct) const
-      -> std::tuple<vec8_type, std::vector<size_t>>;
+  auto m_progressive_helper(const void* header_buf,
+                            size_t buf_len,
+                            unsigned pct) const -> std::tuple<vec8_type, std::vector<size_t>>;
 };
 
 }  // End of namespace sperr
diff --git a/src/SPERR/include/sperr_helper.h b/src/SPERR/include/sperr_helper.h
index b15f5734..befca0b0 100644
--- a/src/SPERR/include/sperr_helper.h
+++ b/src/SPERR/include/sperr_helper.h
@@ -37,7 +37,7 @@ using dims_type = std::array<size_t, 3>;
 //
 // Helper classes
 //
-enum class SigType : unsigned char { Insig, Sig, NewlySig, Dunno, Garbage };
+enum class SigType : unsigned char { Insig, Sig, Dunno, Garbage };
 
 enum class UINTType : unsigned char { UINT8, UINT16, UINT32, UINT64 };
 
@@ -136,8 +136,9 @@ auto read_whole_file(std::string filename) -> vec_type<T>;
 // Read sections of a file (extract sections from a memory buffer), and append those sections
 //    to the end of `dst`. The read from file version avoids reading not-requested sections.
 //    The sections are defined by pairs of offsets and lengths, both in number of bytes.
-auto read_sections(std::string filename, const std::vector<size_t>& sections, vec8_type& dst)
-    -> RTNType;
+auto read_sections(std::string filename,
+                   const std::vector<size_t>& sections,
+                   vec8_type& dst) -> RTNType;
 auto extract_sections(const void* buf,
                       size_t buf_len,
                       const std::vector<size_t>& sections,
diff --git a/src/SPERR/src/Bitmask.cpp b/src/SPERR/src/Bitmask.cpp
index 036274e3..cebf6cd9 100644
--- a/src/SPERR/src/Bitmask.cpp
+++ b/src/SPERR/src/Bitmask.cpp
@@ -1,13 +1,18 @@
 #include "Bitmask.h"
 
 #include <algorithm>
+#include <cassert>
 #include <limits>
 
+#if __cplusplus >= 202002L
+#include <bit>
+#endif
+
 sperr::Bitmask::Bitmask(size_t nbits)
 {
   if (nbits > 0) {
     auto num_longs = nbits / 64;
-    if (nbits % 64 != 0)
+    if (nbits - num_longs * 64 != 0)
       num_longs++;
     m_buf.assign(num_longs, 0);
     m_num_bits = nbits;
@@ -22,7 +27,7 @@ auto sperr::Bitmask::size() const -> size_t
 void sperr::Bitmask::resize(size_t nbits)
 {
   auto num_longs = nbits / 64;
-  if (nbits % 64 != 0)
+  if (nbits - num_longs * 64 != 0)
     num_longs++;
   m_buf.resize(num_longs, 0);
   m_num_bits = nbits;
@@ -35,11 +40,81 @@ auto sperr::Bitmask::rlong(size_t idx) const -> uint64_t
 
 auto sperr::Bitmask::rbit(size_t idx) const -> bool
 {
-  auto word = m_buf[idx / 64];
-  word &= uint64_t{1} << (idx % 64);
+  auto div = idx / 64;
+  auto rem = idx - div * 64;
+  auto word = m_buf[div];
+  word &= uint64_t{1} << rem;
   return (word != 0);
 }
 
+template <bool Position>
+auto sperr::Bitmask::has_true(size_t start, size_t len) const -> int64_t
+{
+  auto long_idx = start / 64;
+  auto processed_bits = int64_t{0};
+  auto word = m_buf[long_idx];
+  auto answer = uint64_t{0};
+
+  // Collect the remaining bits from the start long.
+  auto begin_idx = start - long_idx * 64;
+  auto nbits = std::min(size_t{64}, begin_idx + len);
+  for (auto i = begin_idx; i < nbits; i++) {
+    answer |= word & (uint64_t{1} << i);
+    if constexpr (Position) {
+      if (answer != 0)
+        return processed_bits;
+    }
+    processed_bits++;
+  }
+  if constexpr (!Position) {
+    if (answer != 0)
+      return 1;
+  }
+
+  // Examine the subsequent full longs.
+  while (processed_bits + 64 <= len) {
+    word = m_buf[++long_idx];
+    if (word) {
+      if constexpr (Position) {
+#if __cplusplus >= 202002L
+        int64_t i = std::countr_zero(word);
+        return processed_bits + i;
+#else
+        for (int64_t i = 0; i < 64; i++)
+          if (word & (uint64_t{1} << i))
+            return processed_bits + i;
+#endif
+      }
+      else
+        return 1;
+    }
+    processed_bits += 64;
+  }
+
+  // Examine the remaining bits
+  if (processed_bits < len) {
+    nbits = len - processed_bits;
+    assert(nbits < 64);
+    word = m_buf[++long_idx];
+    answer = 0;
+    for (int64_t i = 0; i < nbits; i++) {
+      answer |= word & (uint64_t{1} << i);
+      if constexpr (Position) {
+        if (answer != 0)
+          return processed_bits + i;
+      }
+    }
+    if constexpr (!Position) {
+      if (answer != 0)
+        return 1;
+    }
+  }
+
+  return -1;
+}
+template auto sperr::Bitmask::has_true<true>(size_t, size_t) const -> int64_t;
+template auto sperr::Bitmask::has_true<false>(size_t, size_t) const -> int64_t;
+
 auto sperr::Bitmask::count_true() const -> size_t
 {
   size_t counter = 0;
@@ -48,11 +123,15 @@ auto sperr::Bitmask::count_true() const -> size_t
 
   // Note that unused bits in the last long are not guaranteed to be all 0's.
   for (size_t i = 0; i < m_buf.size() - 1; i++) {
-    const auto val = m_buf[i];
+    auto val = m_buf[i];
+#if __cplusplus >= 202002L
+    counter += std::popcount(val);
+#else
     if (val != 0) {
       for (size_t j = 0; j < 64; j++)
         counter += ((val >> j) & uint64_t{1});
     }
+#endif
   }
   const auto val = m_buf.back();
   if (val != 0) {
@@ -71,7 +150,7 @@ void sperr::Bitmask::wlong(size_t idx, uint64_t value)
 void sperr::Bitmask::wbit(size_t idx, bool bit)
 {
   const auto wstart = idx / 64;
-  const auto mask = uint64_t{1} << (idx % 64);
+  const auto mask = uint64_t{1} << (idx - wstart * 64);
 
   auto word = m_buf[wstart];
   if (bit)
@@ -84,7 +163,7 @@ void sperr::Bitmask::wbit(size_t idx, bool bit)
 void sperr::Bitmask::wtrue(size_t idx)
 {
   const auto wstart = idx / 64;
-  const auto mask = uint64_t{1} << (idx % 64);
+  const auto mask = uint64_t{1} << (idx - wstart * 64);
 
   auto word = m_buf[wstart];
   word |= mask;
@@ -94,7 +173,7 @@ void sperr::Bitmask::wtrue(size_t idx)
 void sperr::Bitmask::wfalse(size_t idx)
 {
   const auto wstart = idx / 64;
-  const auto mask = uint64_t{1} << (idx % 64);
+  const auto mask = uint64_t{1} << (idx - wstart * 64);
 
   auto word = m_buf[wstart];
   word &= ~mask;
@@ -122,7 +201,7 @@ void sperr::Bitmask::use_bitstream(const void* p)
   std::copy(pu64, pu64 + m_buf.size(), m_buf.begin());
 }
 
-#if defined __cpp_lib_three_way_comparison && defined __cpp_impl_three_way_comparison
+#if __cplusplus >= 202002L && defined __cpp_lib_three_way_comparison
 auto sperr::Bitmask::operator<=>(const Bitmask& rhs) const noexcept
 {
   auto cmp = m_num_bits <=> rhs.m_num_bits;
diff --git a/src/SPERR/src/Bitstream.cpp b/src/SPERR/src/Bitstream.cpp
index 909985f9..cabff558 100644
--- a/src/SPERR/src/Bitstream.cpp
+++ b/src/SPERR/src/Bitstream.cpp
@@ -29,7 +29,7 @@ void sperr::Bitstream::reserve(size_t nbits)
   if (nbits > m_buf.size() * 64) {
     // Number of longs that's absolutely needed.
     auto num_longs = nbits / 64;
-    if (nbits % 64 != 0)
+    if (num_longs * 64 < nbits)
       num_longs++;
 
     const auto dist = std::distance(m_buf.begin(), m_itr);
@@ -54,8 +54,9 @@ auto sperr::Bitstream::rtell() const -> size_t
 
 void sperr::Bitstream::rseek(size_t offset)
 {
-  m_itr = m_buf.begin() + offset / 64;
-  const auto rem = offset % 64;
+  size_t div = offset / 64;
+  size_t rem = offset - div * 64;
+  m_itr = m_buf.begin() + div;
   if (rem) {
     m_buffer = *m_itr >> rem;
     ++m_itr;
@@ -90,8 +91,9 @@ auto sperr::Bitstream::wtell() const -> size_t
 
 void sperr::Bitstream::wseek(size_t offset)
 {
-  m_itr = m_buf.begin() + offset / 64;
-  const auto rem = offset % 64;
+  size_t div = offset / 64;
+  size_t rem = offset - div * 64;
+  m_itr = m_buf.begin() + div;
   if (rem) {
     m_buffer = *m_itr;
     m_buffer &= (uint64_t{1} << rem) - 1;
@@ -114,7 +116,7 @@ void sperr::Bitstream::wbit(bool bit)
 #endif
   {
     if (m_itr == m_buf.end()) {  // allocate memory if necessary.
-      const auto dist = m_buf.size();
+      auto dist = m_buf.size();
       m_buf.resize(std::max(size_t{1}, dist) * 2 - dist / 2);  // use a growth factor of 1.5
       m_itr = m_buf.begin() + dist;
     }
@@ -129,7 +131,7 @@ void sperr::Bitstream::flush()
 {
   if (m_bits) {  // only really flush when there are remaining bits.
     if (m_itr == m_buf.end()) {
-      const auto dist = m_buf.size();
+      auto dist = m_buf.size();
       m_buf.resize(std::max(size_t{1}, dist) * 2 - dist / 2);  // use a growth factor of 1.5
       m_itr = m_buf.begin() + dist;
     }
@@ -144,6 +146,7 @@ void sperr::Bitstream::flush()
 void sperr::Bitstream::write_bitstream(void* p, size_t num_bits) const
 {
   assert(num_bits <= m_buf.size() * 64);
+
   const auto num_longs = num_bits / 64;
   auto rem_bytes = num_bits / 8 - num_longs * sizeof(uint64_t);
   if (num_bits % 8 != 0)
@@ -162,8 +165,9 @@ void sperr::Bitstream::write_bitstream(void* p, size_t num_bits) const
 auto sperr::Bitstream::get_bitstream(size_t num_bits) const -> std::vector<std::byte>
 {
   assert(num_bits <= m_buf.size() * 64);
+
   auto num_bytes = num_bits / 8;
-  if (num_bits % 8 != 0)
+  if (num_bits - num_bytes * 8 != 0)
     num_bytes++;
 
   auto tmp = std::vector<std::byte>(num_bytes);
diff --git a/src/SPERR/src/Conditioner.cpp b/src/SPERR/src/Conditioner.cpp
index 1031f4d7..8d637554 100644
--- a/src/SPERR/src/Conditioner.cpp
+++ b/src/SPERR/src/Conditioner.cpp
@@ -63,8 +63,9 @@ auto sperr::Conditioner::condition(vecd_type& buf, dims_type dims) -> condi_type
   return header;
 }
 
-auto sperr::Conditioner::inverse_condition(vecd_type& buf, dims_type dims, condi_type header)
-    -> RTNType
+auto sperr::Conditioner::inverse_condition(vecd_type& buf,
+                                           dims_type dims,
+                                           condi_type header) -> RTNType
 {
   // unpack meta bit fields
   auto meta = sperr::unpack_8_booleans(header[0]);
diff --git a/src/SPERR/src/SPECK1D_INT_DEC.cpp b/src/SPERR/src/SPECK1D_INT_DEC.cpp
index 8fefd5be..8d73e0e1 100644
--- a/src/SPERR/src/SPECK1D_INT_DEC.cpp
+++ b/src/SPERR/src/SPECK1D_INT_DEC.cpp
@@ -5,6 +5,10 @@
 #include <cstring>  // std::memcpy()
 #include <numeric>
 
+#if __cplusplus >= 202002L
+#include <bit>
+#endif
+
 template <typename T>
 void sperr::SPECK1D_INT_DEC<T>::m_sorting_pass()
 {
@@ -13,7 +17,15 @@ void sperr::SPECK1D_INT_DEC<T>::m_sorting_pass()
   const auto bits_x64 = m_LIP_mask.size() - m_LIP_mask.size() % 64;
 
   for (size_t i = 0; i < bits_x64; i += 64) {
-    const auto value = m_LIP_mask.rlong(i);
+    auto value = m_LIP_mask.rlong(i);
+
+#if __cplusplus >= 202002L
+    while (value) {
+      size_t j = std::countr_zero(value);
+      m_process_P(i + j, j, true);
+      value &= value - 1;
+    }
+#else
     if (value != 0) {
       for (size_t j = 0; j < 64; j++) {
         if ((value >> j) & uint64_t{1}) {
@@ -22,6 +34,7 @@ void sperr::SPECK1D_INT_DEC<T>::m_sorting_pass()
         }
       }
     }
+#endif
   }
   for (auto i = bits_x64; i < m_LIP_mask.size(); i++) {
     if (m_LIP_mask.rbit(i)) {
diff --git a/src/SPERR/src/SPECK1D_INT_ENC.cpp b/src/SPERR/src/SPECK1D_INT_ENC.cpp
index a8c36dc8..45373b04 100644
--- a/src/SPERR/src/SPECK1D_INT_ENC.cpp
+++ b/src/SPERR/src/SPECK1D_INT_ENC.cpp
@@ -5,6 +5,10 @@
 #include <cstring>  // std::memcpy()
 #include <numeric>
 
+#if __cplusplus >= 202002L
+#include <bit>
+#endif
+
 template <typename T>
 void sperr::SPECK1D_INT_ENC<T>::m_sorting_pass()
 {
@@ -13,7 +17,15 @@ void sperr::SPECK1D_INT_ENC<T>::m_sorting_pass()
   const auto bits_x64 = m_LIP_mask.size() - m_LIP_mask.size() % 64;
 
   for (size_t i = 0; i < bits_x64; i += 64) {
-    const auto value = m_LIP_mask.rlong(i);
+    auto value = m_LIP_mask.rlong(i);
+
+#if __cplusplus >= 202002L
+    while (value) {
+      size_t j = std::countr_zero(value);
+      m_process_P(i + j, SigType::Dunno, j, true);
+      value &= value - 1;
+    }
+#else
     if (value != 0) {
       for (size_t j = 0; j < 64; j++) {
         if ((value >> j) & uint64_t{1}) {
@@ -22,6 +34,7 @@ void sperr::SPECK1D_INT_ENC<T>::m_sorting_pass()
         }
       }
     }
+#endif
   }
   for (auto i = bits_x64; i < m_LIP_mask.size(); i++) {
     if (m_LIP_mask.rbit(i)) {
@@ -49,9 +62,6 @@ void sperr::SPECK1D_INT_ENC<T>::m_process_S(size_t idx1,
                                             size_t& counter,
                                             bool output)
 {
-  // Significance type cannot be NewlySig!
-  assert(sig != SigType::NewlySig);
-
   auto& set = m_LIS[idx1][idx2];
 
   // Strategy to decide the significance of this set;
@@ -88,7 +98,6 @@ template <typename T>
 void sperr::SPECK1D_INT_ENC<T>::m_process_P(size_t idx, SigType sig, size_t& counter, bool output)
 {
   // Decide the significance of this pixel
-  assert(sig != SigType::NewlySig);
   bool is_sig = false;
   if (sig == SigType::Dunno)
     is_sig = (m_coeff_buf[idx] >= m_threshold);
diff --git a/src/SPERR/src/SPECK2D_INT.cpp b/src/SPERR/src/SPECK2D_INT.cpp
index 077f3bdf..fdb69e3e 100644
--- a/src/SPERR/src/SPECK2D_INT.cpp
+++ b/src/SPERR/src/SPECK2D_INT.cpp
@@ -3,6 +3,10 @@
 #include <algorithm>
 #include <cassert>
 
+#if __cplusplus >= 202002L
+#include <bit>
+#endif
+
 template <typename T>
 void sperr::SPECK2D_INT<T>::m_sorting_pass()
 {
@@ -11,7 +15,15 @@ void sperr::SPECK2D_INT<T>::m_sorting_pass()
   const auto bits_x64 = m_LIP_mask.size() - m_LIP_mask.size() % 64;
 
   for (size_t i = 0; i < bits_x64; i += 64) {
-    const auto value = m_LIP_mask.rlong(i);
+    auto value = m_LIP_mask.rlong(i);
+
+#if __cplusplus >= 202002L
+    while (value) {
+      size_t j = std::countr_zero(value);
+      m_process_P(i + j, j, true);
+      value &= value - 1;
+    }
+#else
     if (value != 0) {
       for (size_t j = 0; j < 64; j++) {
         if ((value >> j) & uint64_t{1}) {
@@ -20,6 +32,7 @@ void sperr::SPECK2D_INT<T>::m_sorting_pass()
         }
       }
     }
+#endif
   }
   for (auto i = bits_x64; i < m_LIP_mask.size(); i++) {
     if (m_LIP_mask.rbit(i)) {
diff --git a/src/SPERR/src/SPECK3D_INT.cpp b/src/SPERR/src/SPECK3D_INT.cpp
index 694c4c13..763db790 100644
--- a/src/SPERR/src/SPECK3D_INT.cpp
+++ b/src/SPERR/src/SPECK3D_INT.cpp
@@ -5,6 +5,10 @@
 #include <cstring>
 #include <numeric>
 
+#if __cplusplus >= 202002L
+#include <bit>
+#endif
+
 template <typename T>
 void sperr::SPECK3D_INT<T>::m_clean_LIS()
 {
@@ -100,13 +104,22 @@ void sperr::SPECK3D_INT<T>::m_sorting_pass()
   const auto bits_x64 = m_LIP_mask.size() - m_LIP_mask.size() % 64;
 
   for (size_t i = 0; i < bits_x64; i += 64) {
-    const auto value = m_LIP_mask.rlong(i);
+    auto value = m_LIP_mask.rlong(i);
+
+#if __cplusplus >= 202002L
+    while (value) {
+      auto j = std::countr_zero(value);
+      m_process_P_lite(i + j);
+      value &= value - 1;
+    }
+#else
     if (value != 0) {
       for (size_t j = 0; j < 64; j++) {
         if ((value >> j) & uint64_t{1})
           m_process_P_lite(i + j);
       }
     }
+#endif
   }
   for (auto i = bits_x64; i < m_LIP_mask.size(); i++) {
     if (m_LIP_mask.rbit(i))
@@ -127,27 +140,73 @@ void sperr::SPECK3D_INT<T>::m_sorting_pass()
 template <typename T>
 void sperr::SPECK3D_INT<T>::m_code_S(size_t idx1, size_t idx2)
 {
-  auto [subsets, next_lev] = m_partition_S_XYZ(m_LIS[idx1][idx2], uint16_t(idx1));
-
-  // Since some subsets could be empty, let's put empty sets at the end.
-  const auto set_end =
-      std::remove_if(subsets.begin(), subsets.end(), [](auto& s) { return s.num_elem() == 0; });
-
-  // Counter for the number of discovered significant sets.
-  //    If no significant subset is found yet, and we're already looking at the last subset,
-  //    then we know that this last subset IS significant.
-  size_t sig_counter = 0;
-  for (auto it = subsets.begin(); it != set_end; ++it) {
-    bool need_decide = (sig_counter != 0 || it + 1 != set_end);
-    if (it->num_elem() == 1) {
-      auto idx = it->start_z * m_dims[0] * m_dims[1] + it->start_y * m_dims[0] + it->start_x;
-      m_LIP_mask.wtrue(idx);
-      m_process_P(idx, it->get_morton(), sig_counter, need_decide);
-    }
-    else {
-      m_LIS[next_lev].emplace_back(*it);
-      const auto newidx2 = m_LIS[next_lev].size() - 1;
-      m_process_S(next_lev, newidx2, sig_counter, need_decide);
+  auto set = m_LIS[idx1][idx2];
+
+  if (set.length_x == 2 && set.length_y == 2 && set.length_z == 2) {  // tail ellison case
+    size_t sig_counter = 0;
+    bool need_decide = true;
+
+    // Element (0, 0, 0)
+    const auto id = set.start_z * m_dims[0] * m_dims[1] + set.start_y * m_dims[0] + set.start_x;
+    auto mort = set.get_morton();
+    m_LIP_mask.wtrue(id);
+    m_process_P(id, mort, sig_counter, need_decide);
+
+    // Element (1, 0, 0)
+    auto id2 = id + 1;
+    m_LIP_mask.wtrue(id2);
+    m_process_P(id2, ++mort, sig_counter, need_decide);
+
+    // Element (0, 1, 0)
+    id2 = id + m_dims[0];
+    m_LIP_mask.wtrue(id2);
+    m_process_P(id2, ++mort, sig_counter, need_decide);
+
+    // Element (1, 1, 0)
+    m_LIP_mask.wtrue(++id2);
+    m_process_P(id2, ++mort, sig_counter, need_decide);
+
+    // Element (0, 0, 1)
+    id2 = id + m_dims[0] * m_dims[1];
+    m_LIP_mask.wtrue(id2);
+    m_process_P(id2, ++mort, sig_counter, need_decide);
+
+    // Element (1, 0, 1)
+    m_LIP_mask.wtrue(++id2);
+    m_process_P(id2, ++mort, sig_counter, need_decide);
+
+    // Element (0, 1, 1)
+    id2 = id + m_dims[0] * (m_dims[1] + 1);
+    m_LIP_mask.wtrue(id2);
+    m_process_P(id2, ++mort, sig_counter, need_decide);
+
+    // Element (1, 1, 1)
+    need_decide = sig_counter != 0;
+    m_LIP_mask.wtrue(++id2);
+    m_process_P(id2, ++mort, sig_counter, need_decide);
+  }
+  else {  // normal recursion case
+          // Get its 8 subsets, and move the empty ones to the end.
+    auto [subsets, next_lev] = m_partition_S_XYZ(set, uint16_t(idx1));
+    const auto set_end =
+        std::remove_if(subsets.begin(), subsets.end(), [](auto& s) { return s.num_elem() == 0; });
+
+    // Counter for the number of discovered significant sets.
+    //    If no significant subset is found yet, and we're already looking at the last subset,
+    //    then we know that this last subset IS significant.
+    size_t sig_counter = 0;
+    for (auto it = subsets.begin(); it != set_end; ++it) {
+      bool need_decide = (sig_counter != 0 || it + 1 != set_end);
+      if (it->num_elem() == 1) {
+        auto idx = it->start_z * m_dims[0] * m_dims[1] + it->start_y * m_dims[0] + it->start_x;
+        m_LIP_mask.wtrue(idx);
+        m_process_P(idx, it->get_morton(), sig_counter, need_decide);
+      }
+      else {
+        m_LIS[next_lev].emplace_back(*it);
+        const auto newidx2 = m_LIS[next_lev].size() - 1;
+        m_process_S(next_lev, newidx2, sig_counter, need_decide);
+      }
     }
   }
 }
@@ -171,15 +230,13 @@ auto sperr::SPECK3D_INT<T>::m_partition_S_XYZ(Set3D set, uint16_t lev) const
 
   auto subsets = std::tuple<std::array<Set3D, 8>, uint16_t>();
   std::get<1>(subsets) = lev;
-  constexpr auto offsets = std::array<size_t, 3>{1, 2, 4};
   auto morton_offset = set.get_morton();
 
   //
   // The actual figuring out where it starts/ends part...
   //
   // subset (0, 0, 0)
-  constexpr auto idx0 = 0 * offsets[0] + 0 * offsets[1] + 0 * offsets[2];
-  auto& sub0 = std::get<0>(subsets)[idx0];
+  auto& sub0 = std::get<0>(subsets)[0];
   sub0.set_morton(morton_offset);
   sub0.start_x = set.start_x;
   sub0.start_y = set.start_y;
@@ -189,8 +246,7 @@ auto sperr::SPECK3D_INT<T>::m_partition_S_XYZ(Set3D set, uint16_t lev) const
   sub0.length_z = split_z[0];
 
   // subset (1, 0, 0)
-  constexpr auto idx1 = 1 * offsets[0] + 0 * offsets[1] + 0 * offsets[2];
-  auto& sub1 = std::get<0>(subsets)[idx1];
+  auto& sub1 = std::get<0>(subsets)[1];
   morton_offset += sub0.num_elem();
   sub1.set_morton(morton_offset);
   sub1.start_x = set.start_x + split_x[0];
@@ -201,8 +257,7 @@ auto sperr::SPECK3D_INT<T>::m_partition_S_XYZ(Set3D set, uint16_t lev) const
   sub1.length_z = split_z[0];
 
   // subset (0, 1, 0)
-  constexpr auto idx2 = 0 * offsets[0] + 1 * offsets[1] + 0 * offsets[2];
-  auto& sub2 = std::get<0>(subsets)[idx2];
+  auto& sub2 = std::get<0>(subsets)[2];
   morton_offset += sub1.num_elem();
   sub2.set_morton(morton_offset);
   sub2.start_x = set.start_x;
@@ -213,8 +268,7 @@ auto sperr::SPECK3D_INT<T>::m_partition_S_XYZ(Set3D set, uint16_t lev) const
   sub2.length_z = split_z[0];
 
   // subset (1, 1, 0)
-  constexpr auto idx3 = 1 * offsets[0] + 1 * offsets[1] + 0 * offsets[2];
-  auto& sub3 = std::get<0>(subsets)[idx3];
+  auto& sub3 = std::get<0>(subsets)[3];
   morton_offset += sub2.num_elem();
   sub3.set_morton(morton_offset);
   sub3.start_x = set.start_x + split_x[0];
@@ -225,8 +279,7 @@ auto sperr::SPECK3D_INT<T>::m_partition_S_XYZ(Set3D set, uint16_t lev) const
   sub3.length_z = split_z[0];
 
   // subset (0, 0, 1)
-  constexpr auto idx4 = 0 * offsets[0] + 0 * offsets[1] + 1 * offsets[2];
-  auto& sub4 = std::get<0>(subsets)[idx4];
+  auto& sub4 = std::get<0>(subsets)[4];
   morton_offset += sub3.num_elem();
   sub4.set_morton(morton_offset);
   sub4.start_x = set.start_x;
@@ -237,8 +290,7 @@ auto sperr::SPECK3D_INT<T>::m_partition_S_XYZ(Set3D set, uint16_t lev) const
   sub4.length_z = split_z[1];
 
   // subset (1, 0, 1)
-  constexpr auto idx5 = 1 * offsets[0] + 0 * offsets[1] + 1 * offsets[2];
-  auto& sub5 = std::get<0>(subsets)[idx5];
+  auto& sub5 = std::get<0>(subsets)[5];
   morton_offset += sub4.num_elem();
   sub5.set_morton(morton_offset);
   sub5.start_x = set.start_x + split_x[0];
@@ -249,8 +301,7 @@ auto sperr::SPECK3D_INT<T>::m_partition_S_XYZ(Set3D set, uint16_t lev) const
   sub5.length_z = split_z[1];
 
   // subset (0, 1, 1)
-  constexpr auto idx6 = 0 * offsets[0] + 1 * offsets[1] + 1 * offsets[2];
-  auto& sub6 = std::get<0>(subsets)[idx6];
+  auto& sub6 = std::get<0>(subsets)[6];
   morton_offset += sub5.num_elem();
   sub6.set_morton(morton_offset);
   sub6.start_x = set.start_x;
@@ -261,8 +312,7 @@ auto sperr::SPECK3D_INT<T>::m_partition_S_XYZ(Set3D set, uint16_t lev) const
   sub6.length_z = split_z[1];
 
   // subset (1, 1, 1)
-  constexpr auto idx7 = 1 * offsets[0] + 1 * offsets[1] + 1 * offsets[2];
-  auto& sub7 = std::get<0>(subsets)[idx7];
+  auto& sub7 = std::get<0>(subsets)[7];
   morton_offset += sub6.num_elem();
   sub7.set_morton(morton_offset);
   sub7.start_x = set.start_x + split_x[0];
diff --git a/src/SPERR/src/SPECK_INT.cpp b/src/SPERR/src/SPECK_INT.cpp
index f1f4dc9e..fa04a877 100644
--- a/src/SPERR/src/SPECK_INT.cpp
+++ b/src/SPERR/src/SPECK_INT.cpp
@@ -5,6 +5,10 @@
 #include <cstring>
 #include <numeric>
 
+#if __cplusplus >= 202002L
+#include <bit>
+#endif
+
 //
 // Free-standing helper function
 //
@@ -54,7 +58,7 @@ void sperr::SPECK_INT<T>::set_budget(size_t bud)
 }
 
 template <typename T>
-auto sperr::SPECK_INT<T>::get_speck_bits(const void* buf) const -> uint64_t
+auto sperr::SPECK_INT<T>::get_speck_num_bits(const void* buf) const -> uint64_t
 {
   // Given the header definition, directly retrieve the value stored in bytes 1--9.
   const auto* const ptr = static_cast<const uint8_t*>(buf);
@@ -66,7 +70,7 @@ auto sperr::SPECK_INT<T>::get_speck_bits(const void* buf) const -> uint64_t
 template <typename T>
 auto sperr::SPECK_INT<T>::get_stream_full_len(const void* buf) const -> uint64_t
 {
-  auto num_bits = get_speck_bits(buf);
+  auto num_bits = get_speck_num_bits(buf);
   while (num_bits % 8 != 0)
     ++num_bits;
   return (header_size + num_bits / 8);
@@ -311,7 +315,17 @@ void sperr::SPECK_INT<T>::m_refinement_pass_encode()
   const auto bits_x64 = m_LSP_mask.size() - m_LSP_mask.size() % 64;
 
   for (size_t i = 0; i < bits_x64; i += 64) {  // Evaluate 64 bits at a time.
-    const auto value = m_LSP_mask.rlong(i);
+    auto value = m_LSP_mask.rlong(i);
+
+#if __cplusplus >= 202002L
+    while (value) {
+      auto j = std::countr_zero(value);
+      const bool o1 = m_coeff_buf[i + j] >= m_threshold;
+      m_coeff_buf[i + j] -= tmp1[o1];
+      m_bit_buffer.wbit(o1);
+      value &= value - 1;
+    }
+#else
     if (value != 0) {
       for (size_t j = 0; j < 64; j++) {
         if ((value >> j) & uint64_t{1}) {
@@ -321,6 +335,7 @@ void sperr::SPECK_INT<T>::m_refinement_pass_encode()
         }
       }
     }
+#endif
   }
   for (auto i = bits_x64; i < m_LSP_mask.size(); i++) {  // Evaluate the remaining bits.
     if (m_LSP_mask.rbit(i)) {
@@ -357,7 +372,20 @@ void sperr::SPECK_INT<T>::m_refinement_pass_decode()
   if (m_threshold >= uint_type{2}) {                                 // <-- Point 1
     const auto half_t = m_threshold / uint_type{2};
     for (size_t i = 0; i < bits_x64; i += 64) {  // <-- Point 2
-      const auto value = m_LSP_mask.rlong(i);
+      auto value = m_LSP_mask.rlong(i);
+
+#if __cplusplus >= 202002L
+      while (value) {
+        auto j = std::countr_zero(value);
+        if (m_bit_buffer.rbit())
+          m_coeff_buf[i + j] += half_t;
+        else
+          m_coeff_buf[i + j] -= half_t;
+        if (++read_pos == m_avail_bits)              // <-- Point 3
+          goto INITIALIZE_NEWLY_FOUND_POINTS_LABEL;  // <-- Point 4
+        value &= value - 1;
+      }
+#else
       if (value != 0) {
         for (size_t j = 0; j < 64; j++) {
           if ((value >> j) & uint64_t{1}) {
@@ -370,6 +398,7 @@ void sperr::SPECK_INT<T>::m_refinement_pass_decode()
           }
         }
       }
+#endif
     }
     for (auto i = bits_x64; i < m_LSP_mask.size(); i++) {  // <-- Point 2
       if (m_LSP_mask.rbit(i)) {
@@ -381,10 +410,21 @@ void sperr::SPECK_INT<T>::m_refinement_pass_decode()
           goto INITIALIZE_NEWLY_FOUND_POINTS_LABEL;  // <-- Point 4
       }
     }
-  }       // Finish the case where `m_threshold >= 2`.
+  }  // Finish the case where `m_threshold >= 2`.
   else {  // Start the case where `m_threshold == 1`.
     for (size_t i = 0; i < bits_x64; i += 64) {
-      const auto value = m_LSP_mask.rlong(i);
+      auto value = m_LSP_mask.rlong(i);
+
+#if __cplusplus >= 202002L
+      while (value) {
+        auto j = std::countr_zero(value);
+        if (m_bit_buffer.rbit())
+          ++(m_coeff_buf[i + j]);
+        if (++read_pos == m_avail_bits)
+          goto INITIALIZE_NEWLY_FOUND_POINTS_LABEL;
+        value &= value - 1;
+      }
+#else
       for (size_t j = 0; j < 64; j++) {
         if ((value >> j) & uint64_t{1}) {
           if (m_bit_buffer.rbit())
@@ -393,6 +433,7 @@ void sperr::SPECK_INT<T>::m_refinement_pass_decode()
             goto INITIALIZE_NEWLY_FOUND_POINTS_LABEL;
         }
       }
+#endif
     }
     for (auto i = bits_x64; i < m_LSP_mask.size(); i++) {
       if (m_LSP_mask.rbit(i)) {
diff --git a/src/SPERR/src/SPERR3D_OMP_C.cpp b/src/SPERR/src/SPERR3D_OMP_C.cpp
index 22e71509..3ac128ab 100644
--- a/src/SPERR/src/SPERR3D_OMP_C.cpp
+++ b/src/SPERR/src/SPERR3D_OMP_C.cpp
@@ -259,7 +259,9 @@ auto sperr::SPERR3D_OMP_C::m_gather_chunk(const T* vol,
   // Will be subject to Named Return Value Optimization.
   return chunk_buf;
 }
-template auto sperr::SPERR3D_OMP_C::m_gather_chunk(const float*, dims_type, std::array<size_t, 6>)
-    -> vecd_type;
-template auto sperr::SPERR3D_OMP_C::m_gather_chunk(const double*, dims_type, std::array<size_t, 6>)
-    -> vecd_type;
+template auto sperr::SPERR3D_OMP_C::m_gather_chunk(const float*,
+                                                   dims_type,
+                                                   std::array<size_t, 6>) -> vecd_type;
+template auto sperr::SPERR3D_OMP_C::m_gather_chunk(const double*,
+                                                   dims_type,
+                                                   std::array<size_t, 6>) -> vecd_type;
diff --git a/src/SPERR/src/SPERR3D_OMP_D.cpp b/src/SPERR/src/SPERR3D_OMP_D.cpp
index 0546191c..a487bff0 100644
--- a/src/SPERR/src/SPERR3D_OMP_D.cpp
+++ b/src/SPERR/src/SPERR3D_OMP_D.cpp
@@ -144,6 +144,11 @@ auto sperr::SPERR3D_OMP_D::release_hierarchy() -> std::vector<vecd_type>&&
   return std::move(m_hierarchy);
 }
 
+auto sperr::SPERR3D_OMP_D::view_hierarchy() const -> const std::vector<vecd_type>&
+{
+  return m_hierarchy;
+}
+
 auto sperr::SPERR3D_OMP_D::view_decoded_data() const -> const sperr::vecd_type&
 {
   return m_vol_buf;
diff --git a/src/SPERR/src/SPERR3D_Stream_Tools.cpp b/src/SPERR/src/SPERR3D_Stream_Tools.cpp
index 608045a4..2017b1ad 100644
--- a/src/SPERR/src/SPERR3D_Stream_Tools.cpp
+++ b/src/SPERR/src/SPERR3D_Stream_Tools.cpp
@@ -104,8 +104,8 @@ auto sperr::SPERR3D_Stream_Tools::get_stream_header(const void* p) const -> SPER
   return header;
 }
 
-auto sperr::SPERR3D_Stream_Tools::progressive_read(std::string filename, unsigned pct) const
-    -> vec8_type
+auto sperr::SPERR3D_Stream_Tools::progressive_read(const std::string& filename,
+                                                   unsigned pct) const -> vec8_type
 {
   // Read the header of this bitstream.
   auto vec20 = sperr::read_n_bytes(filename, 20);
diff --git a/src/SPERR/src/SPERR_C_API.cpp b/src/SPERR/src/SPERR_C_API.cpp
index 867fb681..0628372f 100644
--- a/src/SPERR/src/SPERR_C_API.cpp
+++ b/src/SPERR/src/SPERR_C_API.cpp
@@ -8,15 +8,15 @@
 
 #include "SPERR3D_Stream_Tools.h"
 
-int C_API::sperr_comp_2d(const void* src,
-                         int is_float,
-                         size_t dimx,
-                         size_t dimy,
-                         int mode,
-                         double quality,
-                         int out_inc_header,
-                         void** dst,
-                         size_t* dst_len)
+auto C_API::sperr_comp_2d(const void* src,
+                          int is_float,
+                          size_t dimx,
+                          size_t dimy,
+                          int mode,
+                          double quality,
+                          int out_inc_header,
+                          void** dst,
+                          size_t* dst_len) -> int
 {
   // Examine if `dst` is pointing to a NULL pointer
   if (*dst != nullptr)
@@ -95,12 +95,12 @@ int C_API::sperr_comp_2d(const void* src,
   return 0;
 }
 
-int C_API::sperr_decomp_2d(const void* src,
-                           size_t src_len,
-                           int output_float,
-                           size_t dimx,
-                           size_t dimy,
-                           void** dst)
+auto C_API::sperr_decomp_2d(const void* src,
+                            size_t src_len,
+                            int output_float,
+                            size_t dimx,
+                            size_t dimy,
+                            void** dst) -> int
 {
   // Examine if `dst` is pointing to a NULL pointer
   if (*dst != nullptr)
@@ -153,19 +153,19 @@ void C_API::sperr_parse_header(const void* src,
   *dimz = dims[2];
 }
 
-int C_API::sperr_comp_3d(const void* src,
-                         int is_float,
-                         size_t dimx,
-                         size_t dimy,
-                         size_t dimz,
-                         size_t chunk_x,
-                         size_t chunk_y,
-                         size_t chunk_z,
-                         int mode,
-                         double quality,
-                         size_t nthreads,
-                         void** dst,
-                         size_t* dst_len)
+auto C_API::sperr_comp_3d(const void* src,
+                          int is_float,
+                          size_t dimx,
+                          size_t dimy,
+                          size_t dimz,
+                          size_t chunk_x,
+                          size_t chunk_y,
+                          size_t chunk_z,
+                          int mode,
+                          double quality,
+                          size_t nthreads,
+                          void** dst,
+                          size_t* dst_len) -> int
 {
   // Examine if `dst` is pointing to a NULL pointer
   if (*dst != nullptr)
@@ -215,14 +215,14 @@ int C_API::sperr_comp_3d(const void* src,
   return 0;
 }
 
-int C_API::sperr_decomp_3d(const void* src,
-                           size_t src_len,
-                           int output_float,
-                           size_t nthreads,
-                           size_t* dimx,
-                           size_t* dimy,
-                           size_t* dimz,
-                           void** dst)
+auto C_API::sperr_decomp_3d(const void* src,
+                            size_t src_len,
+                            int output_float,
+                            size_t nthreads,
+                            size_t* dimx,
+                            size_t* dimy,
+                            size_t* dimz,
+                            void** dst) -> int
 {
   // Examine if `dst` is pointing to a NULL pointer.
   if (*dst != nullptr)
@@ -257,11 +257,11 @@ int C_API::sperr_decomp_3d(const void* src,
   return 0;
 }
 
-int C_API::sperr_trunc_3d(const void* src,
-                          size_t src_len,
-                          unsigned pct,
-                          void** dst,
-                          size_t* dst_len)
+auto C_API::sperr_trunc_3d(const void* src,
+                           size_t src_len,
+                           unsigned pct,
+                           void** dst,
+                           size_t* dst_len) -> int
 {
   if (*dst != nullptr)
     return 1;
diff --git a/src/SPERR/src/notes_on_clang-tidy b/src/SPERR/src/notes_on_clang-tidy
index 8d308687..59442704 100644
--- a/src/SPERR/src/notes_on_clang-tidy
+++ b/src/SPERR/src/notes_on_clang-tidy
@@ -5,4 +5,4 @@ cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON .
 
 2. I used the following clang-tidy options:
 
-clang-tidy-10 ../src/SPECK3D.cpp -checks=-*,performance-*,portability-*,modernize-*,clang-analyzer-*,-modernize-avoid-c-arrays,-modernize-use-nodiscard  -header-filter=../include/* -fix
+clang-tidy ../src/SPECK3D.cpp -checks=-*,performance-*,portability-*,modernize-*,clang-analyzer-*,-modernize-avoid-c-arrays,-modernize-use-nodiscard  -header-filter=../include/* -fix
diff --git a/src/SPERR/src/sperr_helper.cpp b/src/SPERR/src/sperr_helper.cpp
index 6737495b..eb45d23f 100644
--- a/src/SPERR/src/sperr_helper.cpp
+++ b/src/SPERR/src/sperr_helper.cpp
@@ -90,10 +90,10 @@ auto sperr::coarsened_resolutions(dims_type vdim, dims_type cdim) -> std::vector
     auto nz = vdim[2] / cdim[2];
 
     resolutions = sperr::coarsened_resolutions(cdim);
-    for (size_t i = 0; i < resolutions.size(); i++) {
-      resolutions[i][0] *= nx;
-      resolutions[i][1] *= ny;
-      resolutions[i][2] *= nz;
+    for (auto& resolution : resolutions) {
+      resolution[0] *= nx;
+      resolution[1] *= ny;
+      resolution[2] *= nz;
     }
   }
 
@@ -333,8 +333,9 @@ auto sperr::write_n_bytes(std::string filename, size_t n_bytes, const void* buff
     return RTNType::Good;
 }
 
-auto sperr::read_sections(std::string filename, const std::vector<size_t>& sections, vec8_type& dst)
-    -> RTNType
+auto sperr::read_sections(std::string filename,
+                          const std::vector<size_t>& sections,
+                          vec8_type& dst) -> RTNType
 {
   // Calculate the farthest file location to be read.
   size_t far = 0;
@@ -494,8 +495,10 @@ auto sperr::calc_stats(const T* arr1, const T* arr2, size_t arr_len, size_t omp_
   return {rmse, linfty, psnr, arr1min, arr1max};
 }
 template auto sperr::calc_stats(const float*, const float*, size_t, size_t) -> std::array<float, 5>;
-template auto sperr::calc_stats(const double*, const double*, size_t, size_t)
-    -> std::array<double, 5>;
+template auto sperr::calc_stats(const double*,
+                                const double*,
+                                size_t,
+                                size_t) -> std::array<double, 5>;
 
 template <typename T>
 auto sperr::kahan_summation(const T* arr, size_t len) -> T
@@ -514,8 +517,8 @@ auto sperr::kahan_summation(const T* arr, size_t len) -> T
 template auto sperr::kahan_summation(const float*, size_t) -> float;
 template auto sperr::kahan_summation(const double*, size_t) -> double;
 
-auto sperr::chunk_volume(dims_type vol_dim, dims_type chunk_dim)
-    -> std::vector<std::array<size_t, 6>>
+auto sperr::chunk_volume(dims_type vol_dim,
+                         dims_type chunk_dim) -> std::vector<std::array<size_t, 6>>
 {
   // Step 1: figure out how many segments are there along each axis.
   auto n_segs = std::array<size_t, 3>();
diff --git a/src/SPERR/test_scripts/CMakeLists.txt b/src/SPERR/test_scripts/CMakeLists.txt
index 9e7d4f6f..0dbf6c50 100644
--- a/src/SPERR/test_scripts/CMakeLists.txt
+++ b/src/SPERR/test_scripts/CMakeLists.txt
@@ -1,29 +1,29 @@
 add_executable(        sperr_helper sperr_helper_unit_test.cpp )
-target_link_libraries( sperr_helper PUBLIC SPERR gtest_main )
+target_link_libraries( sperr_helper PUBLIC SPERR GTest::gtest_main )
 
 add_executable(        bitstream bitstream_unit_test.cpp )
-target_link_libraries( bitstream PUBLIC SPERR gtest_main )
+target_link_libraries( bitstream PUBLIC SPERR GTest::gtest_main )
 
 add_executable(        dwt dwt_unit_test.cpp )
-target_link_libraries( dwt PUBLIC SPERR gtest_main )
+target_link_libraries( dwt PUBLIC SPERR GTest::gtest_main )
 
 add_executable(        speck_int speck_int_unit_test.cpp )
-target_link_libraries( speck_int PUBLIC SPERR gtest_main )
+target_link_libraries( speck_int PUBLIC SPERR GTest::gtest_main )
 
 add_executable(        outlier_coder outlier_coder_unit_test.cpp )
-target_link_libraries( outlier_coder PUBLIC SPERR gtest_main )
+target_link_libraries( outlier_coder PUBLIC SPERR GTest::gtest_main )
 
 add_executable(        speck2d_flt speck2d_flt_unit_test.cpp )
-target_link_libraries( speck2d_flt PUBLIC SPERR gtest_main )
+target_link_libraries( speck2d_flt PUBLIC SPERR GTest::gtest_main )
 
 add_executable(        speck3d_flt speck3d_flt_unit_test.cpp )
-target_link_libraries( speck3d_flt PUBLIC SPERR gtest_main )
+target_link_libraries( speck3d_flt PUBLIC SPERR GTest::gtest_main )
 
 add_executable(        sperr3d_omp sperr3d_omp_unit_test.cpp )
-target_link_libraries( sperr3d_omp PUBLIC SPERR gtest_main )
+target_link_libraries( sperr3d_omp PUBLIC SPERR GTest::gtest_main )
 
 add_executable(        stream_tools stream_tools_unit_test.cpp )
-target_link_libraries( stream_tools PUBLIC SPERR gtest_main )
+target_link_libraries( stream_tools PUBLIC SPERR GTest::gtest_main )
 
 include(GoogleTest)
 gtest_discover_tests( sperr_helper )
diff --git a/src/SPERR/test_scripts/bitstream_unit_test.cpp b/src/SPERR/test_scripts/bitstream_unit_test.cpp
index 26b9b846..a9cfdc17 100644
--- a/src/SPERR/test_scripts/bitstream_unit_test.cpp
+++ b/src/SPERR/test_scripts/bitstream_unit_test.cpp
@@ -360,7 +360,68 @@ TEST(Bitmask, BufferTransfer)
     EXPECT_EQ(src.rbit(i), dst.rbit(i));
 }
 
-#if defined __cpp_lib_three_way_comparison && defined __cpp_impl_three_way_comparison
+TEST(Bitmask, has_true)
+{
+  const size_t mask_size = 210;
+
+  // Loop over all positions
+  for (size_t idx = 0; idx < mask_size; idx++) {
+    auto mask = Mask(mask_size);
+    mask.wtrue(idx);
+
+    // Loop over all starting positions
+    for (size_t start = 0; start < mask_size; start++) {
+
+      // Loop over all range length
+      for (size_t len = 0; len < mask_size - start; len++) {
+        auto ans1 = mask.has_true<false>(start, len);
+        auto ans2 = -1l;
+        for (size_t i = start; i < start + len; i++)
+          if (mask.rbit(i)) {
+            ans2 = 1;
+            break;
+          }
+        EXPECT_EQ(ans1, ans2);
+      }
+
+    }
+  }
+}
+
+TEST(Bitmask, has_true_position)
+{
+  const size_t mask_size = 210;
+
+  // Loop over all positions
+  for (size_t idx = 0; idx < mask_size; idx++) {
+    auto mask = Mask(mask_size);
+    mask.wtrue(idx);
+
+    // Loop over all starting positions
+    for (size_t start = 0; start < mask_size; start++) {
+
+      // Loop over all range length
+      for (size_t len = 0; len < mask_size - start; len++) {
+        auto ans1 = mask.has_true<true>(start, len);
+        auto ans2 = -1l;
+        for (size_t i = start; i < start + len; i++)
+          if (mask.rbit(i)) {
+            ans2 = i - start;
+            break;
+          }
+        EXPECT_EQ(ans1, ans2) << "idx = " << idx << ", start = " << start << ", len = " << len << std::endl;
+        if (ans1 != ans2)
+          goto END_LABEL;
+      }
+
+    }
+  }
+
+END_LABEL:
+  {}
+}
+
+#if __cplusplus >= 201907L && defined __cpp_lib_three_way_comparison
 TEST(Bitmask, spaceship)
 {
   auto src = Mask(60);
diff --git a/src/SPERR/utilities/double_prec.cpp b/src/SPERR/utilities/raw_tools/double_prec.cpp
similarity index 100%
rename from src/SPERR/utilities/double_prec.cpp
rename to src/SPERR/utilities/raw_tools/double_prec.cpp
diff --git a/src/SPERR/utilities/show_version.cpp b/src/SPERR/utilities/show_version.cpp
index cd894588..d1e23358 100644
--- a/src/SPERR/utilities/show_version.cpp
+++ b/src/SPERR/utilities/show_version.cpp
@@ -8,4 +8,5 @@ int main()
             << SPERR_VERSION_PATCH << std::endl;
   std::cout << "Based on code Branch: " << SPERR_GIT_BRANCH << std::endl;
   std::cout << "Based on code SHA1  : " << SPERR_GIT_SHA1 << std::endl;
+  std::cout << "C++ Standard Support: " << __cplusplus << std::endl;
 }
diff --git a/src/SPERR/utilities/sperr2d.cpp b/src/SPERR/utilities/sperr2d.cpp
index aa557f91..89a7313b 100644
--- a/src/SPERR/utilities/sperr2d.cpp
+++ b/src/SPERR/utilities/sperr2d.cpp
@@ -130,6 +130,7 @@ int main(int argc, char* argv[])
   //
   auto bitstream = std::string();
   app.add_option("--bitstream", bitstream, "Output compressed bitstream.")
+      ->needs(cptr)
       ->group("Output settings");
 
   auto decomp_f32 = std::string();
@@ -188,6 +189,10 @@ int main(int argc, char* argv[])
   //
   // A little extra sanity check.
   //
+  if (input_file.empty()) {
+    std::cout << "What's the input file?" << std::endl;
+    return __LINE__;
+  }
   if (!cflag && !dflag) {
     std::cout << "Is this compressing (-c) or decompressing (-d) ?" << std::endl;
     return __LINE__;
@@ -215,6 +220,13 @@ int main(int argc, char* argv[])
     std::cout << "SPERR needs an output destination when decoding!" << std::endl;
     return __LINE__;
   }
+  // Print a warning message if there's no output specified
+  if (cflag && bitstream.empty())
+    std::cout << "Warning: no output file provided. Consider using --bitstream option."
+              << std::endl;
+  if (dflag && decomp_f64.empty() && decomp_f32.empty() && decomp_lowres_f64.empty() &&
+      decomp_lowres_f32.empty())
+    std::cout << "Warning: no output file provided." << std::endl;
 
   //
   // Really starting the real work!
@@ -228,7 +240,7 @@ int main(int argc, char* argv[])
     if ((ftype == 32 && (total_vals * 4 != input.size())) ||
         (ftype == 64 && (total_vals * 8 != input.size()))) {
       std::cout << "Input file size wrong!" << std::endl;
-      return __LINE__;
+      return __LINE__ % 256;
     }
     auto encoder = std::make_unique<sperr::SPECK2D_FLT>();
     encoder->set_dims(dims);
@@ -259,7 +271,7 @@ int main(int argc, char* argv[])
     auto rtn = encoder->compress();
     if (rtn != sperr::RTNType::Good) {
       std::cout << "Compression failed!" << std::endl;
-      return __LINE__;
+      return __LINE__ % 256;
     }
 
     // Assemble the output bitstream.
diff --git a/src/SPERR/utilities/sperr3d.cpp b/src/SPERR/utilities/sperr3d.cpp
index bb4ffc1d..b23bf66a 100644
--- a/src/SPERR/utilities/sperr3d.cpp
+++ b/src/SPERR/utilities/sperr3d.cpp
@@ -12,8 +12,9 @@
 
 // This functions takes in a filename, and a full resolution. It then creates a list of
 // filenames, each has the coarsened resolution appended.
-auto create_filenames(std::string name, sperr::dims_type vdims, sperr::dims_type cdims)
-    -> std::vector<std::string>
+auto create_filenames(std::string name,
+                      sperr::dims_type vdims,
+                      sperr::dims_type cdims) -> std::vector<std::string>
 {
   auto filenames = std::vector<std::string>();
   auto resolutions = sperr::coarsened_resolutions(vdims, cdims);
@@ -141,6 +142,7 @@ int main(int argc, char* argv[])
   //
   auto bitstream = std::string();
   app.add_option("--bitstream", bitstream, "Output compressed bitstream.")
+      ->needs(cptr)
       ->group("Output settings");
 
   auto decomp_f32 = std::string();
@@ -205,6 +207,10 @@ int main(int argc, char* argv[])
   //
   // A little extra sanity check.
   //
+  if (input_file.empty()) {
+    std::cout << "What's the input file?" << std::endl;
+    return __LINE__;
+  }
   if (!cflag && !dflag) {
     std::cout << "Is this compressing (-c) or decompressing (-d) ?" << std::endl;
     return __LINE__;
@@ -232,6 +238,30 @@ int main(int argc, char* argv[])
     std::cout << "SPERR needs an output destination when decoding!" << std::endl;
     return __LINE__;
   }
+  // Also check if the chunk dims can support multi-resolution decoding.
+  if (cflag && (!decomp_lowres_f64.empty() || !decomp_lowres_f32.empty())) {
+    auto name = decomp_lowres_f64;
+    if (name.empty())
+      name = decomp_lowres_f32;
+    assert(!name.empty());
+    auto filenames = create_filenames(name, dims, chunks);
+    if (filenames.empty()) {
+      std::printf(
+          " Warning: the combo of volume dimension (%lu, %lu, %lu) and chunk dimension"
+          " (%lu, %lu, %lu)\n cannot support multi-resolution decoding. "
+          " Try to use chunk dimensions that\n are similar in length and"
+          " can divide the volume dimension.\n",
+          dims[0], dims[1], dims[2], chunks[0], chunks[1], chunks[2]);
+      return __LINE__ % 256;
+    }
+  }
+  // Print a warning message if there's no output specified
+  if (cflag && bitstream.empty())
+    std::cout << "Warning: no output file provided. Consider using --bitstream option."
+              << std::endl;
+  if (dflag && decomp_f64.empty() && decomp_f32.empty() && decomp_lowres_f64.empty() &&
+      decomp_lowres_f32.empty())
+    std::cout << "Warning: no output file provided." << std::endl;
 
   //
   // Really starting the real work!
@@ -242,7 +272,7 @@ int main(int argc, char* argv[])
     if ((ftype == 32 && (total_vals * 4 != input.size())) ||
         (ftype == 64 && (total_vals * 8 != input.size()))) {
       std::cout << "Input file size wrong!" << std::endl;
-      return __LINE__;
+      return __LINE__ % 256;
     }
     auto encoder = std::make_unique<sperr::SPERR3D_OMP_C>();
     encoder->set_dims_and_chunks(dims, chunks);