Skip to content

Commit

Permalink
improve polar extent panning performance
Browse files Browse the repository at this point in the history
this is nearly a full rewrite of this component

- The calculation of the weighting function is vastly simplified -- in
  most cases it's just a dot product and a couple of comparisons.

- There is a parallel (SIMD) implementation of this which operates on
  multiple points at once.

  - Processing more than one point at once breaks the interface between
    the weighting function and the spreading panner, so this is removed.
    Instead the separation is between a 'core' which calculates some gains
    based on some internal parameters, the point positions and point
    gains, and the rest of it which sets up these parameters and
    post-processes the gains.

  - There are multiple implementations of the 'core', one for scalars and
    one for each supported instruction set, and these are chosen at
    run-time.

  - If all points in a batch have a gain of 1, a pre-computed sum of all
    gains in this batch can be added to the result, which makes the
    performance less dependent on the extent size.

- The old implementation is kept as part of the tests as a reference to
  compare against, because it's known-good and this is hard to test.

- The interface is changed to avoid allocation.

- Single-precision floats are used instead of doubles for calculating
  the weights and summing the gains, as the difference in results is
  negligible.
  • Loading branch information
tomjnixon committed May 2, 2023
1 parent 603314f commit 10e0717
Show file tree
Hide file tree
Showing 18 changed files with 1,326 additions and 443 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ option(EAR_EXAMPLES "Build examples" ${IS_ROOT_PROJECT})
option(EAR_PACKAGE_AND_INSTALL "Package and install libear" ${IS_ROOT_PROJECT})
option(EAR_USE_INTERNAL_EIGEN "should we use our own version of Eigen, or find one with find_package?" TRUE)
option(EAR_USE_INTERNAL_XSIMD "should we use our own version of xsimd, or find one with find_package?" TRUE)
option(EAR_SIMD "try to use SIMD extensions" TRUE)
set(INSTALL_LIB_DIR lib CACHE PATH "Installation directory for libraries")
set(INSTALL_BIN_DIR bin CACHE PATH "Installation directory for executables")
set(INSTALL_INCLUDE_DIR include CACHE PATH "Installation directory for header files")
Expand Down Expand Up @@ -100,6 +101,7 @@ add_feature_info(EAR_EXAMPLES ${EAR_EXAMPLES} "Build examples")
add_feature_info(EAR_PACKAGE_AND_INSTALL ${EAR_PACKAGE_AND_INSTALL} "Package and install libear")
add_feature_info(EAR_USE_INTERNAL_EIGEN ${EAR_USE_INTERNAL_EIGEN} "use internal version of Eigen")
add_feature_info(EAR_USE_INTERNAL_XSIMD ${EAR_USE_INTERNAL_XSIMD} "use internal version of xsimd")
add_feature_info(EAR_SIMD ${EAR_SIMD} "try to use SIMD extensions")
feature_summary(WHAT ALL)

if(EAR_PACKAGE_AND_INSTALL)
Expand Down
60 changes: 59 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ add_library(ear
hoa/hoa.cpp
layout.cpp
screen.cpp
object_based/extent.cpp
object_based/polar_extent.cpp
object_based/polar_extent_scalar.cpp
object_based/polar_extent_simd.cpp
object_based/gain_calculator_objects.cpp
warnings.cpp
${CMAKE_CURRENT_BINARY_DIR}/resources.hpp
Expand Down Expand Up @@ -81,6 +83,62 @@ else()
endif()
set_target_properties(ear PROPERTIES CXX_EXTENSIONS OFF)

include(CheckCompilerFlag)

set(PER_ARCH_SOURCES object_based/polar_extent_simd_instance.cpp)
function(add_arch_lib TARGET ARCH FLAGS)
check_compiler_flag(CXX "${FLAGS}" ear_has_flag_${ARCH}) # var is cached and must be unique
if (ear_has_flag_${ARCH})
list(APPEND XSIMD_ARCHS xsimd::${ARCH})
add_library(${TARGET}_${ARCH} OBJECT ${PER_ARCH_SOURCES})
target_sources(${TARGET} PRIVATE $<TARGET_OBJECTS:${TARGET}_${ARCH}>)

# copy relevant target properties to feature-specific objects
foreach(PROPERTY COMPILE_DEFINITIONS COMPILE_FEATURES INCLUDE_DIRECTORIES LINK_LIBRARIES
POSITION_INDEPENDENT_CODE CXX_VISIBILITY_PRESET C_VISIBILITY_PRESET VISIBILITY_INLINES_HIDDEN)
get_target_property(VALUE ${TARGET} ${PROPERTY})
if (NOT VALUE STREQUAL "VALUE-NOTFOUND")
set_property(TARGET ${TARGET}_${ARCH} PROPERTY ${PROPERTY} ${VALUE})
endif()
endforeach()

target_compile_definitions(${TARGET}_${ARCH} PRIVATE XSIMD_ARCH=xsimd::${ARCH})

target_compile_options(${TARGET}_${ARCH} PRIVATE ${FLAGS})
endif()

set(XSIMD_ARCHS "${XSIMD_ARCHS}" PARENT_SCOPE)
endfunction()

if (EAR_SIMD)
# add_arch_lib must be called from best to worst instruction set
if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64|X86|i686")
if (MSVC)
add_arch_lib(ear avx512bw "/arch:AVX512")
add_arch_lib(ear avx2_fma "/arch:AVX2")
add_arch_lib(ear avx "/arch:AVX")
else()
add_arch_lib(ear avx512bw "-mavx512bw;-mavx512dq;-mavx512cd;-mavx512f;-mfma;-mavx2;-mavx")
add_arch_lib(ear avx2_fma "-mfma;-mavx2;-mavx")
add_arch_lib(ear avx "-mavx")
add_arch_lib(ear sse4_2 "-msse4.2")
endif()
add_arch_lib(ear default_arch "")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "ARM.*|arm.*|aarch64.*")
# for arm, flags are not generally needed as the FP extension support is
# closely tied to the ISA, so just build the default and fallback
#
# this can go wrong if the compiler doesn't actually support neon, but it's
# tricky to detect that
add_arch_lib(ear default_arch "")
endif()
endif()

list(APPEND XSIMD_ARCHS xsimd::generic_for_dispatch)

list(JOIN XSIMD_ARCHS "," XSIMD_ARCHS_STR)
target_compile_definitions(ear PRIVATE "XSIMD_ARCHS=${XSIMD_ARCHS_STR}")

include(GenerateExportHeader)
generate_export_header(ear
EXPORT_FILE_NAME ${PROJECT_BINARY_DIR}/generated/export.hpp
Expand Down
22 changes: 22 additions & 0 deletions src/common/helpers/xsimd_extension.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#pragma once
#include <xsimd/types/xsimd_all_registers.hpp>

namespace xsimd {
// aliases for things which will be used via preprocessor defines, to avoid
// quoting issues
using avx2_fma = fma3<avx2>;

// a version of generic which works with xsimd::dispatch
struct generic_for_dispatch : generic {
static constexpr bool supported() noexcept { return true; }
static constexpr bool available() noexcept { return true; }
static constexpr bool requires_alignment() noexcept { return false; }
static constexpr unsigned version() noexcept {
return generic::version(0, 0, 0);
}
static constexpr std::size_t alignment() noexcept { return 0; }
static constexpr char const* name() noexcept {
return "generic_for_dispatch";
}
};
} // namespace xsimd
242 changes: 0 additions & 242 deletions src/object_based/extent.cpp

This file was deleted.

Loading

0 comments on commit 10e0717

Please sign in to comment.