Skip to content

Commit

Permalink
Merge branch 'master' into cuda_12.4_dep
Browse files Browse the repository at this point in the history
  • Loading branch information
jaycedowell committed Sep 5, 2024
2 parents 790c8f5 + 970dd02 commit 168737b
Show file tree
Hide file tree
Showing 73 changed files with 11,210 additions and 1,628 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
strategy:
matrix:
os: [self-hosted, ubuntu-latest, macos-latest]
python-version: ['3.8', '3.10']
python-version: ['3.8', '3.10', '3.12']
include:
- os: ubuntu-20.04
python-version: '3.6'
Expand All @@ -39,6 +39,7 @@ jobs:
exuberant-ctags \
gfortran \
git \
libhwloc-dev \
libopenblas-dev \
pkg-config \
software-properties-common
Expand All @@ -50,6 +51,7 @@ jobs:
ctags-exuberant \
gawk \
gnu-sed \
hwloc \
pkg-config
- uses: actions/setup-python@v5.0.0
with:
Expand Down Expand Up @@ -110,8 +112,6 @@ jobs:
coverage run --source=bifrost.ring,bifrost,bifrost.pipeline test_fft.py
coverage run --source=bifrost.ring,bifrost,bifrost.pipeline your_first_block.py
python download_breakthrough_listen_data.py -y
coverage run --source=bifrost.ring,bifrost,bifrost.pipeline test_guppi.py
coverage run --source=bifrost.ring,bifrost,bifrost.pipeline test_guppi_reader.py
coverage run --source=bifrost.ring,bifrost,bifrost.pipeline test_fdmt.py ./testdata/pulsars/blc0_guppi_57407_61054_PSR_J1840%2B5640_0004.fil
coverage xml
- name: "Upload Coverage"
Expand Down
13 changes: 11 additions & 2 deletions Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,12 @@ DAT_DIR = share
SRC_DIR = src

HAVE_PYTHON = @HAVE_PYTHON@

HAVE_DOCKER = @HAVE_DOCKER@

CAN_BUILD_CXX_DOCS = @HAVE_CXX_DOCS@
CAN_BUILD_PYTHON_DOCS = @HAVE_PYTHON_DOCS@

BIFROST_PYTHON_DIR = python

all: libbifrost python
Expand All @@ -34,7 +38,7 @@ endif
test:
#$(MAKE) -C $(SRC_DIR) test
ifeq ($(HAVE_PYTHON),1)
cd test && ./download_test_data.sh ; python -m unittest discover
cd test && ./download_test_data.sh ; @PYTHON@ -m unittest discover -v
endif
.PHONY: test
clean:
Expand Down Expand Up @@ -76,8 +80,13 @@ ifeq ($(HAVE_PYTHON),1)
endif
.PHONY: uninstall

doc: $(INC_DIR)/bifrost/*.h Doxyfile
doc: $(INC_DIR)/bifrost/*.h Doxyfile docs/source/*.rst docs/source/*.py
ifeq ($(CAN_BUILD_CXX_DOCS),1)
@DX_DOXYGEN@ Doxyfile
endif
ifeq ($(CAN_BUILD_PYTHON_DOCS),1)
$(MAKE) -C docs singlehtml
endif
.PHONY: doc

python: libbifrost
Expand Down
21 changes: 20 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,20 @@ print "All done"
**For a quick demo which you can run in-browser without installation,
go to the following [link](https://colab.research.google.com/github/ledatelescope/bifrost/blob/master/BifrostDemo.ipynb).**

### CUDA

CUDA is available at https://developer.nvidia.com/cuda-downloads. You can check the
["Getting Started guide"](http://ledatelescope.github.io/bifrost/Getting-started-guide.html)
in the docs to see which versions of the CUDA toolkit have been confirmed to work with Bifrost.

### C Dependencies

If using Ubuntu or another Debian-based linux distribution:

$ sudo apt-get install exuberant-ctags

Otherwise check https://ctags.sourceforge.net/ for install instructions.

### Python Dependencies

* numpy
Expand Down Expand Up @@ -171,8 +181,17 @@ your machine.

### Building the Docs from Scratch

Install sphinx and breathe using pip, and also install Doxygen.
Install breathe using pip:

$ sudo pip install breathe sphinx

Also install Doxygen using your package manager.
In Ubuntu, for example:

$ sudo apt-get install doxygen

Otherwise check https://www.doxygen.nl/ for Doxygen install instructions.

Doxygen documentation can be generated by running:

$ make doc
Expand Down
72 changes: 48 additions & 24 deletions config/cuda.m4
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ AC_DEFUN([AX_CHECK_CUDA],
[enable_cuda=no],
[enable_cuda=yes])
NVCCLIBS=""
ac_compile_save="$ac_compile"
ac_link_save="$ac_link"
ac_run_save="$ac_run"
AC_SUBST([HAVE_CUDA], [0])
AC_SUBST([CUDA_VERSION], [0])
AC_SUBST([CUDA_HAVE_CXX20], [0])
Expand All @@ -38,12 +43,12 @@ AC_DEFUN([AX_CHECK_CUDA],
CXXFLAGS_save="$CXXFLAGS"
LDFLAGS_save="$LDFLAGS"
LIBS_save="$LIBS"
NVCCLIBS_save="$NVCCLIBS"
ac_ext_save="$ac_ext"
ac_compile='$NVCC -c $NVCCFLAGS conftest.$ac_ext >&5'
LDFLAGS="-L$CUDA_HOME/lib64 -L$CUDA_HOME/lib"
LIBS="$LIBS -lcuda -lcudart"
NVCCLIBS="$LIBS -lcuda -lcudart"
ac_ext="cu"
ac_link='$NVCC -o conftest$ac_exeext $NVCCFLAGS $LDFLAGS $LIBS conftest.$ac_ext >&5'
Expand All @@ -52,19 +57,31 @@ AC_DEFUN([AX_CHECK_CUDA],
#include <cuda.h>
#include <cuda_runtime.h>]],
[[cudaMalloc(0, 0);]])],
[CUDA_VERSION=$( ${NVCC} --version | ${GREP} -Po -e "release.*," | cut -d, -f1 | cut -d\ -f2 )
CUDA_MAJOR=$( echo "${CUDA_VERSION}" | cut -d. -f1 )
if test "${CUDA_MAJOR}" -ge 10; then
AC_MSG_RESULT(yes - v$CUDA_VERSION)
else
AC_MSG_RESULT(no - found v$CUDA_VERSION)
fi],
[AC_MSG_RESULT(no - build failure)
AC_SUBST([HAVE_CUDA], [0])])
[],
[AC_SUBST([HAVE_CUDA], [0])])
if test "$HAVE_CUDA" = "1"; then
LDFLAGS="-L$CUDA_HOME/lib64 -L$CUDA_HOME/lib"
NVCCLIBS="$NVCCLIBS -lcuda -lcudart"
ac_link='$NVCC -o conftest$ac_exeext $NVCCFLAGS $LDFLAGS $LIBS $NVCCLIBS conftest.$ac_ext >&5'
AC_LINK_IFELSE([
AC_LANG_PROGRAM([[
#include <cuda.h>
#include <cuda_runtime.h>]],
[[cudaMalloc(0, 0);]])],
[CUDA_VERSION=$( ${NVCC} --version | ${GREP} -Po -e "release.*," | cut -d, -f1 | cut -d\ -f2 )
AC_MSG_RESULT(yes - v$CUDA_VERSION)],
[AC_MSG_RESULT(no)
AC_SUBST([HAVE_CUDA], [0])])
else
AC_MSG_RESULT(no)
AC_SUBST([HAVE_CUDA], [0])
fi
CXXFLAGS="$CXXFLAGS_save"
LDFLAGS="$LDFLAGS_save"
LIBS="$LIBS_save"
NVCCLIBS="$NVCCLIBS_save"
ac_ext="$ac_ext_save"
fi
Expand Down Expand Up @@ -134,7 +151,7 @@ AC_DEFUN([AX_CHECK_CUDA],
CXXFLAGS="$CXXFLAGS -DBF_CUDA_ENABLED=1"
NVCCFLAGS="$NVCCFLAGS -DBF_CUDA_ENABLED=1"
LDFLAGS="$LDFLAGS -L$CUDA_HOME/lib64 -L$CUDA_HOME/lib"
LIBS="$LIBS -lcuda -lcudart -lnvrtc -lcublas -lcudadevrt -L. -lcufft_static_pruned -lculibos -lnvToolsExt"
NVCCLIBS="$NVCCLIBS -lcuda -lcudart -lnvrtc -lcublas -lcudadevrt -L. -lcufft_static_pruned -lculibos -lnvToolsExt"
fi
AC_ARG_WITH([gpu_archs],
Expand All @@ -153,13 +170,13 @@ AC_DEFUN([AX_CHECK_CUDA],
CXXFLAGS_save="$CXXFLAGS"
LDFLAGS_save="$LDFLAGS"
LIBS_save="$LIBS"
NVCCLIBS_save="$NVCCLIBS"
ac_ext_save="$ac_ext"
LDFLAGS="-L$CUDA_HOME/lib64 -L$CUDA_HOME/lib"
LIBS="-lcuda -lcudart"
NVCCLIBS="-lcuda -lcudart"
ax_ext="cu"
ac_run='$NVCC -o conftest$ac_ext $LDFLAGS $LIBS conftest.$ac_ext>&5'
ac_run='$NVCC -o conftest$ac_ext $LDFLAGS $LIBS $NVCCLIBS conftest.$ac_ext>&5'
AC_RUN_IFELSE([
AC_LANG_PROGRAM([[
#include <cuda.h>
Expand Down Expand Up @@ -209,7 +226,7 @@ AC_DEFUN([AX_CHECK_CUDA],
CXXFLAGS="$CXXFLAGS_save"
LDFLAGS="$LDFLAGS_save"
LIBS="$LIBS_save"
NVCCLIBS="$NVCCLIBS_save"
ac_ext="$ac_ext_save"
else
AC_SUBST([GPU_ARCHS], [$with_gpu_archs])
Expand Down Expand Up @@ -240,11 +257,11 @@ AC_DEFUN([AX_CHECK_CUDA],
CXXFLAGS_save="$CXXFLAGS"
LDFLAGS_save="$LDFLAGS"
LIBS_save="$LIBS"
NVCCLIBS_save="$NVCCLIBS"
ac_ext_save="$ac_ext"
LDFLAGS="-L$CUDA_HOME/lib64 -L$CUDA_HOME/lib"
LIBS="-lcuda -lcudart"
NVCCLIBS="-lcuda -lcudart"
ac_ext="cu"
ac_run='$NVCC -o conftest$ac_ext $LDFLAGS $LIBS conftest.$ac_ext>&5'
AC_RUN_IFELSE([
Expand Down Expand Up @@ -283,7 +300,7 @@ AC_DEFUN([AX_CHECK_CUDA],
CXXFLAGS="$CXXFLAGS_save"
LDFLAGS="$LDFLAGS_save"
LIBS="$LIBS_save"
NVCCLIBS="$NVCCLIBS_save"
ac_ext="$ac_ext_save"
else
AC_SUBST([GPU_SHAREDMEM], [$with_shared_mem])
Expand All @@ -302,13 +319,13 @@ AC_DEFUN([AX_CHECK_CUDA],
AC_MSG_CHECKING([for thrust pinned allocated support])
CXXFLAGS_save="$CXXFLAGS"
LDFLAGS_save="$LDFLAGS"
LIBS_save="$LIBS"
NVCCLIBS_save="$NVCCLIBS"
ac_ext_save="$ac_ext"
LDFLAGS="-L$CUDA_HOME/lib64 -L$CUDA_HOME/lib"
LIBS="-lcuda -lcudart"
NVCCLIBS="-lcuda -lcudart"
ac_ext="cu"
ac_run='$NVCC -o conftest$ac_ext $LDFLAGS $LIBS conftest.$ac_ext>&5'
ac_run='$NVCC -o conftest$ac_ext $LDFLAGS $LIBS $NVCCLIBS conftest.$ac_ext>&5'
AC_RUN_IFELSE([
AC_LANG_PROGRAM([[
#include <cuda.h>
Expand All @@ -322,7 +339,14 @@ AC_DEFUN([AX_CHECK_CUDA],
CXXFLAGS="$CXXFLAGS_save"
LDFLAGS="$LDFLAGS_save"
LIBS="$LIBS_save"
NVCCLIBS="$NVCCLIBS_save"
ac_ext="$ac_ext_save"
else
AC_SUBST([GPU_PASCAL_MANAGEDMEM], [0])
AC_SUBST([GPU_EXP_PINNED_ALLOC], [1])
fi
ac_compile="$ac_compile_save"
ac_link="$ac_link_save"
ac_run="$ac_run_save"
])
121 changes: 121 additions & 0 deletions config/intrinsics.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#
# SSE
#

AC_DEFUN([AX_CHECK_SSE],
[
AC_PROVIDE([AX_CHECK_SSE])
AC_ARG_ENABLE([sse],
[AS_HELP_STRING([--disable-sse],
[disable SSE support (default=no)])],
[enable_sse=no],
[enable_sse=yes])
AC_SUBST([HAVE_SSE], [0])
if test "$enable_sse" = "yes"; then
AC_MSG_CHECKING([for SSE support via '-msse'])
CXXFLAGS_temp="$CXXFLAGS -msse"
ac_run="$CXX -o conftest$ac_ext $CXXFLAGS_temp conftest.$ac_ext>&5"
AC_RUN_IFELSE([
AC_LANG_PROGRAM([[
#include <xmmintrin.h>]],
[[
__m128 x = _mm_set1_ps(1.0f);
x = _mm_add_ps(x, x);
return _mm_cvtss_f32(x) != 2.0f;]])],
[
CXXFLAGS="$CXXFLAGS -msse"
AC_SUBST([HAVE_SSE], [1])
AC_MSG_RESULT([yes])
], [
AC_MSG_RESULT([no])
])
fi
])



#
# AVX
#

AC_DEFUN([AX_CHECK_AVX],
[
AC_PROVIDE([AX_CHECK_AVX])
AC_ARG_ENABLE([avx],
[AS_HELP_STRING([--disable-avx],
[disable AVX support (default=no)])],
[enable_avx=no],
[enable_avx=yes])
AC_SUBST([HAVE_AVX], [0])
if test "$enable_avx" = "yes"; then
AC_MSG_CHECKING([for AVX support via '-mavx'])
CXXFLAGS_temp="$CXXFLAGS -mavx"
ac_run_save="$ac_run"
ac_run="$CXX -o conftest$ac_ext $CXXFLAGS_temp conftest.$ac_ext>&5"
AC_RUN_IFELSE([
AC_LANG_PROGRAM([[
#include <immintrin.h>]],
[[
__m256d x = _mm256_set1_pd(1.0);
x = _mm256_add_pd(x, x);
return _mm256_cvtsd_f64(x) != 2.0;]])],
[
CXXFLAGS="$CXXFLAGS -mavx"
AC_SUBST([HAVE_AVX], [1])
AC_MSG_RESULT([yes])
], [
AC_MSG_RESULT([no])
])
ac_run="$ac_run_save"
fi
])

#
# AVX512
#

AC_DEFUN([AX_CHECK_AVX512],
[
AC_PROVIDE([AX_CHECK_AVX512])
AC_ARG_ENABLE([avx512],
[AS_HELP_STRING([--disable-avx512],
[disable AVX512 support (default=no)])],
[enable_avx512=no],
[enable_avx512=yes])
AC_SUBST([HAVE_AVX512], [0])
if test "$enable_avx512" = "yes"; then
AC_MSG_CHECKING([for AVX-512 support via '-mavx512f'])
CXXFLAGS_temp="$CXXFLAGS -mavx512f"
ac_run_save="$ac_run"
ac_run="$CXX -o conftest$ac_ext $CXXFLAGS_temp conftest.$ac_ext>&5"
AC_RUN_IFELSE([
AC_LANG_PROGRAM([[
#include <immintrin.h>]],
[[
__m512d x = _mm512_set1_pd(1.0);
x = _mm512_add_pd(x, x);
return _mm512_cvtsd_f64(x) != 2.0;]])],
[
CXXFLAGS="$CXXFLAGS -mavx512f"
AC_SUBST([HAVE_AVX512], [1])
AC_MSG_RESULT([yes])
], [
AC_MSG_RESULT([no])
])
ac_run="$ac_run_save"
fi
])
Loading

0 comments on commit 168737b

Please sign in to comment.