diff --git a/include/micm/util/cuda_dense_matrix.hpp b/include/micm/util/cuda_dense_matrix.hpp index d36aca792..a44552787 100644 --- a/include/micm/util/cuda_dense_matrix.hpp +++ b/include/micm/util/cuda_dense_matrix.hpp @@ -13,9 +13,6 @@ #include -#define CHECK_CUDA_ERROR(err, msg) micm::cuda::CheckCudaError(err, __FILE__, __LINE__, msg) -#define CHECK_CUBLAS_ERROR(err, msg) micm::cuda::CheckCublasError(err, __FILE__, __LINE__, msg) - namespace micm { /** diff --git a/include/micm/util/cuda_matrix.cuh b/include/micm/util/cuda_matrix.cuh index e77c0b8ca..45adecc4c 100644 --- a/include/micm/util/cuda_matrix.cuh +++ b/include/micm/util/cuda_matrix.cuh @@ -12,6 +12,9 @@ #include #include +#define CHECK_CUDA_ERROR(err, msg) micm::cuda::CheckCudaError(err, __FILE__, __LINE__, msg) +#define CHECK_CUBLAS_ERROR(err, msg) micm::cuda::CheckCublasError(err, __FILE__, __LINE__, msg) + namespace micm { namespace cuda diff --git a/include/micm/util/cuda_sparse_matrix.hpp b/include/micm/util/cuda_sparse_matrix.hpp index bacc4352b..dda528833 100644 --- a/include/micm/util/cuda_sparse_matrix.hpp +++ b/include/micm/util/cuda_sparse_matrix.hpp @@ -13,8 +13,6 @@ #include -#define CHECK_CUDA_ERROR(err, msg) micm::cuda::CheckCudaError(err, __FILE__, __LINE__, msg) - namespace micm { template diff --git a/src/process/process_set.cu b/src/process/process_set.cu index c7070c576..cd9e86693 100644 --- a/src/process/process_set.cu +++ b/src/process/process_set.cu @@ -3,6 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ #include +#include namespace micm { @@ -137,23 +138,23 @@ namespace micm ProcessSetParam devstruct; /// Allocate memory space on the device - cudaMalloc(&(devstruct.number_of_reactants_), number_of_reactants_bytes); - cudaMalloc(&(devstruct.reactant_ids_), reactant_ids_bytes); - cudaMalloc(&(devstruct.number_of_products_), number_of_products_bytes); - cudaMalloc(&(devstruct.product_ids_), product_ids_bytes); - cudaMalloc(&(devstruct.yields_), yields_bytes); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.number_of_reactants_), number_of_reactants_bytes), "cudaMalloc"); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.reactant_ids_), reactant_ids_bytes), "cudaMalloc"); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.number_of_products_), number_of_products_bytes), "cudaMalloc"); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.product_ids_), product_ids_bytes), "cudaMalloc"); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.yields_), yields_bytes), "cudaMalloc"); /// Copy the data from host to device - cudaMemcpy( + CHECK_CUDA_ERROR(cudaMemcpy( devstruct.number_of_reactants_, hoststruct.number_of_reactants_, number_of_reactants_bytes, - cudaMemcpyHostToDevice); - cudaMemcpy(devstruct.reactant_ids_, hoststruct.reactant_ids_, reactant_ids_bytes, cudaMemcpyHostToDevice); - cudaMemcpy( - devstruct.number_of_products_, hoststruct.number_of_products_, number_of_products_bytes, cudaMemcpyHostToDevice); - cudaMemcpy(devstruct.product_ids_, hoststruct.product_ids_, product_ids_bytes, cudaMemcpyHostToDevice); - cudaMemcpy(devstruct.yields_, hoststruct.yields_, yields_bytes, cudaMemcpyHostToDevice); + cudaMemcpyHostToDevice), "cudaMemcpy"); + CHECK_CUDA_ERROR(cudaMemcpy(devstruct.reactant_ids_, hoststruct.reactant_ids_, reactant_ids_bytes, cudaMemcpyHostToDevice), "cudaMemcpy"); + CHECK_CUDA_ERROR(cudaMemcpy( + devstruct.number_of_products_, hoststruct.number_of_products_, number_of_products_bytes, cudaMemcpyHostToDevice), "cudaMemcpy"); + CHECK_CUDA_ERROR(cudaMemcpy(devstruct.product_ids_, hoststruct.product_ids_, product_ids_bytes, cudaMemcpyHostToDevice), "cudaMemcpy"); + CHECK_CUDA_ERROR(cudaMemcpy(devstruct.yields_, hoststruct.yields_, yields_bytes, cudaMemcpyHostToDevice), "cudaMemcpy"); devstruct.number_of_reactants_size_ = hoststruct.number_of_reactants_size_; devstruct.reactant_ids_size_ = hoststruct.reactant_ids_size_; @@ -173,11 +174,11 @@ namespace micm size_t jacobian_flat_ids_bytes = sizeof(size_t) * hoststruct.jacobian_flat_ids_size_; /// Allocate memory space on the device - cudaMalloc(&(devstruct.jacobian_flat_ids_), jacobian_flat_ids_bytes); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.jacobian_flat_ids_), jacobian_flat_ids_bytes), "cudaMalloc"); /// Copy the data from host to device - cudaMemcpy( - devstruct.jacobian_flat_ids_, hoststruct.jacobian_flat_ids_, jacobian_flat_ids_bytes, cudaMemcpyHostToDevice); + CHECK_CUDA_ERROR(cudaMemcpy( + devstruct.jacobian_flat_ids_, hoststruct.jacobian_flat_ids_, jacobian_flat_ids_bytes, cudaMemcpyHostToDevice), "cudaMemcpy"); devstruct.jacobian_flat_ids_size_ = hoststruct.jacobian_flat_ids_size_; } @@ -186,14 +187,14 @@ namespace micm /// members of class "CudaProcessSet" on the device void FreeConstData(ProcessSetParam& devstruct) { - cudaFree(devstruct.number_of_reactants_); - cudaFree(devstruct.reactant_ids_); - cudaFree(devstruct.number_of_products_); - cudaFree(devstruct.product_ids_); - cudaFree(devstruct.yields_); + CHECK_CUDA_ERROR(cudaFree(devstruct.number_of_reactants_), "cudaFree"); + CHECK_CUDA_ERROR(cudaFree(devstruct.reactant_ids_), "cudaFree"); + CHECK_CUDA_ERROR(cudaFree(devstruct.number_of_products_), "cudaFree"); + CHECK_CUDA_ERROR(cudaFree(devstruct.product_ids_), "cudaFree"); + CHECK_CUDA_ERROR(cudaFree(devstruct.yields_), "cudaFree"); if (devstruct.jacobian_flat_ids_ != nullptr) { - cudaFree(devstruct.jacobian_flat_ids_); + CHECK_CUDA_ERROR(cudaFree(devstruct.jacobian_flat_ids_), "cudaFree"); } } diff --git a/src/solver/linear_solver.cu b/src/solver/linear_solver.cu index daa4998dc..8b0d994b6 100644 --- a/src/solver/linear_solver.cu +++ b/src/solver/linear_solver.cu @@ -3,6 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ #include +#include #include @@ -92,16 +93,16 @@ namespace micm /// Create a struct whose members contain the addresses in the device memory. LinearSolverParam devstruct; - cudaMalloc(&(devstruct.nLij_Lii_), nLij_Lii_bytes); - cudaMalloc(&(devstruct.Lij_yj_), Lij_yj_bytes); - cudaMalloc(&(devstruct.nUij_Uii_), nUij_Uii_bytes); - cudaMalloc(&(devstruct.Uij_xj_), Uij_xj_bytes); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.nLij_Lii_), nLij_Lii_bytes), "cudaMalloc"); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.Lij_yj_), Lij_yj_bytes), "cudaMalloc"); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.nUij_Uii_), nUij_Uii_bytes), "cudaMalloc"); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.Uij_xj_), Uij_xj_bytes), "cudaMalloc"); /// Copy the data from host to device - cudaMemcpy(devstruct.nLij_Lii_, hoststruct.nLij_Lii_, nLij_Lii_bytes, cudaMemcpyHostToDevice); - cudaMemcpy(devstruct.Lij_yj_, hoststruct.Lij_yj_, Lij_yj_bytes, cudaMemcpyHostToDevice); - cudaMemcpy(devstruct.nUij_Uii_, hoststruct.nUij_Uii_, nUij_Uii_bytes, cudaMemcpyHostToDevice); - cudaMemcpy(devstruct.Uij_xj_, hoststruct.Uij_xj_, Uij_xj_bytes, cudaMemcpyHostToDevice); + CHECK_CUDA_ERROR(cudaMemcpy(devstruct.nLij_Lii_, hoststruct.nLij_Lii_, nLij_Lii_bytes, cudaMemcpyHostToDevice), "cudaMemcpy"); + CHECK_CUDA_ERROR(cudaMemcpy(devstruct.Lij_yj_, hoststruct.Lij_yj_, Lij_yj_bytes, cudaMemcpyHostToDevice), "cudaMemcpy"); + CHECK_CUDA_ERROR(cudaMemcpy(devstruct.nUij_Uii_, hoststruct.nUij_Uii_, nUij_Uii_bytes, cudaMemcpyHostToDevice), "cudaMemcpy"); + CHECK_CUDA_ERROR(cudaMemcpy(devstruct.Uij_xj_, hoststruct.Uij_xj_, Uij_xj_bytes, cudaMemcpyHostToDevice), "cudaMemcpy"); devstruct.nLij_Lii_size_ = hoststruct.nLij_Lii_size_; devstruct.Lij_yj_size_ = hoststruct.Lij_yj_size_; @@ -115,10 +116,10 @@ namespace micm /// members of class "CudaLinearSolver" on the device void FreeConstData(LinearSolverParam& devstruct) { - cudaFree(devstruct.nLij_Lii_); - cudaFree(devstruct.Lij_yj_); - cudaFree(devstruct.nUij_Uii_); - cudaFree(devstruct.Uij_xj_); + CHECK_CUDA_ERROR(cudaFree(devstruct.nLij_Lii_), "cudaFree"); + CHECK_CUDA_ERROR(cudaFree(devstruct.Lij_yj_), "cudaFree"); + CHECK_CUDA_ERROR(cudaFree(devstruct.nUij_Uii_), "cudaFree"); + CHECK_CUDA_ERROR(cudaFree(devstruct.Uij_xj_), "cudaFree"); } void SolveKernelDriver( diff --git a/src/solver/lu_decomposition.cu b/src/solver/lu_decomposition.cu index 13dd9f7a6..1218efeab 100644 --- a/src/solver/lu_decomposition.cu +++ b/src/solver/lu_decomposition.cu @@ -3,9 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ #include - -#include -#include +#include namespace micm { @@ -121,28 +119,28 @@ namespace micm /// Create a struct whose members contain the addresses in the device memory. LuDecomposeParam devstruct; - cudaMalloc(&(devstruct.niLU_), niLU_bytes); - cudaMalloc(&(devstruct.do_aik_), do_aik_bytes); - cudaMalloc(&(devstruct.aik_), aik_bytes); - cudaMalloc(&(devstruct.uik_nkj_), uik_nkj_bytes); - cudaMalloc(&(devstruct.lij_ujk_), lij_ujk_bytes); - cudaMalloc(&(devstruct.do_aki_), do_aki_bytes); - cudaMalloc(&(devstruct.aki_), aki_bytes); - cudaMalloc(&(devstruct.lki_nkj_), lki_nkj_bytes); - cudaMalloc(&(devstruct.lkj_uji_), lkj_uji_bytes); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.niLU_), niLU_bytes), "cudaMalloc"); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.do_aik_), do_aik_bytes), "cudaMalloc"); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.aik_), aik_bytes), "cudaMalloc"); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.uik_nkj_), uik_nkj_bytes), "cudaMalloc"); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.lij_ujk_), lij_ujk_bytes), "cudaMalloc"); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.do_aki_), do_aki_bytes), "cudaMalloc"); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.aki_), aki_bytes), "cudaMalloc"); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.lki_nkj_), lki_nkj_bytes), "cudaMalloc"); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.lkj_uji_), lkj_uji_bytes), "cudaMalloc"); cudaMalloc(&(devstruct.uii_), uii_bytes); /// Copy the data from host to device - cudaMemcpy(devstruct.niLU_, hoststruct.niLU_, niLU_bytes, cudaMemcpyHostToDevice); - cudaMemcpy(devstruct.do_aik_, hoststruct.do_aik_, do_aik_bytes, cudaMemcpyHostToDevice); - cudaMemcpy(devstruct.aik_, hoststruct.aik_, aik_bytes, cudaMemcpyHostToDevice); - cudaMemcpy(devstruct.uik_nkj_, hoststruct.uik_nkj_, uik_nkj_bytes, cudaMemcpyHostToDevice); - cudaMemcpy(devstruct.lij_ujk_, hoststruct.lij_ujk_, lij_ujk_bytes, cudaMemcpyHostToDevice); - cudaMemcpy(devstruct.do_aki_, hoststruct.do_aki_, do_aki_bytes, cudaMemcpyHostToDevice); - cudaMemcpy(devstruct.aki_, hoststruct.aki_, aki_bytes, cudaMemcpyHostToDevice); - cudaMemcpy(devstruct.lki_nkj_, hoststruct.lki_nkj_, lki_nkj_bytes, cudaMemcpyHostToDevice); - cudaMemcpy(devstruct.lkj_uji_, hoststruct.lkj_uji_, lkj_uji_bytes, cudaMemcpyHostToDevice); - cudaMemcpy(devstruct.uii_, hoststruct.uii_, uii_bytes, cudaMemcpyHostToDevice); + CHECK_CUDA_ERROR(cudaMemcpy(devstruct.niLU_, hoststruct.niLU_, niLU_bytes, cudaMemcpyHostToDevice), "cudaMemcpy"); + CHECK_CUDA_ERROR(cudaMemcpy(devstruct.do_aik_, hoststruct.do_aik_, do_aik_bytes, cudaMemcpyHostToDevice), "cudaMemcpy"); + CHECK_CUDA_ERROR(cudaMemcpy(devstruct.aik_, hoststruct.aik_, aik_bytes, cudaMemcpyHostToDevice), "cudaMemcpy"); + CHECK_CUDA_ERROR(cudaMemcpy(devstruct.uik_nkj_, hoststruct.uik_nkj_, uik_nkj_bytes, cudaMemcpyHostToDevice), "cudaMemcpy"); + CHECK_CUDA_ERROR(cudaMemcpy(devstruct.lij_ujk_, hoststruct.lij_ujk_, lij_ujk_bytes, cudaMemcpyHostToDevice), "cudaMemcpy"); + CHECK_CUDA_ERROR(cudaMemcpy(devstruct.do_aki_, hoststruct.do_aki_, do_aki_bytes, cudaMemcpyHostToDevice), "cudaMemcpy"); + CHECK_CUDA_ERROR(cudaMemcpy(devstruct.aki_, hoststruct.aki_, aki_bytes, cudaMemcpyHostToDevice), "cudaMemcpy"); + CHECK_CUDA_ERROR(cudaMemcpy(devstruct.lki_nkj_, hoststruct.lki_nkj_, lki_nkj_bytes, cudaMemcpyHostToDevice), "cudaMemcpy"); + CHECK_CUDA_ERROR(cudaMemcpy(devstruct.lkj_uji_, hoststruct.lkj_uji_, lkj_uji_bytes, cudaMemcpyHostToDevice), "cudaMemcpy"); + CHECK_CUDA_ERROR(cudaMemcpy(devstruct.uii_, hoststruct.uii_, uii_bytes, cudaMemcpyHostToDevice), "cudaMemcpy"); devstruct.niLU_size_ = hoststruct.niLU_size_; return devstruct; @@ -152,16 +150,16 @@ namespace micm /// members of class "CudaLuDecomposition" on the device void FreeConstData(LuDecomposeParam& devstruct) { - cudaFree(devstruct.niLU_); - cudaFree(devstruct.do_aik_); - cudaFree(devstruct.aik_); - cudaFree(devstruct.uik_nkj_); - cudaFree(devstruct.lij_ujk_); - cudaFree(devstruct.do_aki_); - cudaFree(devstruct.aki_); - cudaFree(devstruct.lki_nkj_); - cudaFree(devstruct.lkj_uji_); - cudaFree(devstruct.uii_); + CHECK_CUDA_ERROR(cudaFree(devstruct.niLU_), "cudaFree"); + CHECK_CUDA_ERROR(cudaFree(devstruct.do_aik_), "cudaFree"); + CHECK_CUDA_ERROR(cudaFree(devstruct.aik_), "cudaFree"); + CHECK_CUDA_ERROR(cudaFree(devstruct.uik_nkj_), "cudaFree"); + CHECK_CUDA_ERROR(cudaFree(devstruct.lij_ujk_), "cudaFree"); + CHECK_CUDA_ERROR(cudaFree(devstruct.do_aki_), "cudaFree"); + CHECK_CUDA_ERROR(cudaFree(devstruct.aki_), "cudaFree"); + CHECK_CUDA_ERROR(cudaFree(devstruct.lki_nkj_), "cudaFree"); + CHECK_CUDA_ERROR(cudaFree(devstruct.lkj_uji_), "cudaFree"); + CHECK_CUDA_ERROR(cudaFree(devstruct.uii_), "cudaFree"); } void DecomposeKernelDriver( diff --git a/src/solver/rosenbrock.cu b/src/solver/rosenbrock.cu index 3b067a545..350909ea6 100644 --- a/src/solver/rosenbrock.cu +++ b/src/solver/rosenbrock.cu @@ -5,11 +5,9 @@ #include #include #include - +#include #include -#include - namespace micm { namespace cuda @@ -48,19 +46,19 @@ namespace micm /// Create a struct whose members contain the addresses in the device memory. CudaRosenbrockSolverParam devstruct; - cudaMalloc(&(devstruct.errors_input_), errors_bytes); - cudaMalloc(&(devstruct.errors_output_), errors_bytes); - cudaMalloc(&(devstruct.jacobian_diagonal_elements_), jacobian_diagonal_elements_bytes); - cudaMalloc(&(devstruct.absolute_tolerance_), tolerance_bytes); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.errors_input_), errors_bytes), "cudaMalloc"); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.errors_output_), errors_bytes), "cudaMalloc"); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.jacobian_diagonal_elements_), jacobian_diagonal_elements_bytes), "cudaMalloc"); + CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.absolute_tolerance_), tolerance_bytes), "cudaMalloc"); /// Copy the data from host to device - cudaMemcpy( + CHECK_CUDA_ERROR(cudaMemcpy( devstruct.jacobian_diagonal_elements_, hoststruct.jacobian_diagonal_elements_, jacobian_diagonal_elements_bytes, - cudaMemcpyHostToDevice); + cudaMemcpyHostToDevice), "cudaMemcpy"); - cudaMemcpy(devstruct.absolute_tolerance_, hoststruct.absolute_tolerance_, tolerance_bytes, cudaMemcpyHostToDevice); + CHECK_CUDA_ERROR(cudaMemcpy(devstruct.absolute_tolerance_, hoststruct.absolute_tolerance_, tolerance_bytes, cudaMemcpyHostToDevice), "cudaMemcpy"); devstruct.errors_size_ = hoststruct.errors_size_; devstruct.jacobian_diagonal_elements_size_ = hoststruct.jacobian_diagonal_elements_size_; @@ -73,10 +71,10 @@ namespace micm /// members and temporary variables of class "CudaLuDecomposition" on the device void FreeConstData(CudaRosenbrockSolverParam& devstruct) { - cudaFree(devstruct.errors_input_); - cudaFree(devstruct.errors_output_); - cudaFree(devstruct.jacobian_diagonal_elements_); - cudaFree(devstruct.absolute_tolerance_); + CHECK_CUDA_ERROR(cudaFree(devstruct.errors_input_), "cudaFree"); + CHECK_CUDA_ERROR(cudaFree(devstruct.errors_output_), "cudaFree"); + CHECK_CUDA_ERROR(cudaFree(devstruct.jacobian_diagonal_elements_), "cudaFree"); + CHECK_CUDA_ERROR(cudaFree(devstruct.absolute_tolerance_), "cudaFree"); } // Specific CUDA device function to do reduction within a warp @@ -308,7 +306,7 @@ namespace micm } cudaDeviceSynchronize(); - cudaMemcpy(&normalized_error, &devstruct.errors_output_[0], sizeof(double), cudaMemcpyDeviceToHost); + CHECK_CUDA_ERROR(cudaMemcpy(&normalized_error, &devstruct.errors_output_[0], sizeof(double), cudaMemcpyDeviceToHost), "cudaMemcpy"); normalized_error = std::sqrt(normalized_error / number_of_elements); } // end of if-else for CUDA/CUBLAS implementation return std::max(normalized_error, 1.0e-10); diff --git a/src/util/cuda_matrix.cu b/src/util/cuda_matrix.cu index 90c3749d9..7414652ae 100644 --- a/src/util/cuda_matrix.cu +++ b/src/util/cuda_matrix.cu @@ -4,10 +4,7 @@ */ #include #include - #include - -#include #include namespace micm