Skip to content

Commit

Permalink
add error checks for all the CUDA APIs
Browse files Browse the repository at this point in the history
  • Loading branch information
sjsprecious committed Jun 5, 2024
1 parent 601a3fc commit b9bfca3
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 88 deletions.
3 changes: 0 additions & 3 deletions include/micm/util/cuda_dense_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@

#include <type_traits>

#define CHECK_CUDA_ERROR(err, msg) micm::cuda::CheckCudaError(err, __FILE__, __LINE__, msg)
#define CHECK_CUBLAS_ERROR(err, msg) micm::cuda::CheckCublasError(err, __FILE__, __LINE__, msg)

namespace micm
{
/**
Expand Down
3 changes: 3 additions & 0 deletions include/micm/util/cuda_matrix.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
#include <string>
#include <vector>

#define CHECK_CUDA_ERROR(err, msg) micm::cuda::CheckCudaError(err, __FILE__, __LINE__, msg)
#define CHECK_CUBLAS_ERROR(err, msg) micm::cuda::CheckCublasError(err, __FILE__, __LINE__, msg)

namespace micm
{
namespace cuda
Expand Down
2 changes: 0 additions & 2 deletions include/micm/util/cuda_sparse_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@

#include <type_traits>

#define CHECK_CUDA_ERROR(err, msg) micm::cuda::CheckCudaError(err, __FILE__, __LINE__, msg)

namespace micm
{
template<class T, class OrderingPolicy>
Expand Down
43 changes: 22 additions & 21 deletions src/process/process_set.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* SPDX-License-Identifier: Apache-2.0
*/
#include <micm/util/cuda_param.hpp>
#include <micm/util/cuda_matrix.cuh>

namespace micm
{
Expand Down Expand Up @@ -137,23 +138,23 @@ namespace micm
ProcessSetParam devstruct;

/// Allocate memory space on the device
cudaMalloc(&(devstruct.number_of_reactants_), number_of_reactants_bytes);
cudaMalloc(&(devstruct.reactant_ids_), reactant_ids_bytes);
cudaMalloc(&(devstruct.number_of_products_), number_of_products_bytes);
cudaMalloc(&(devstruct.product_ids_), product_ids_bytes);
cudaMalloc(&(devstruct.yields_), yields_bytes);
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.number_of_reactants_), number_of_reactants_bytes), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.reactant_ids_), reactant_ids_bytes), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.number_of_products_), number_of_products_bytes), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.product_ids_), product_ids_bytes), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.yields_), yields_bytes), "cudaMalloc");

/// Copy the data from host to device
cudaMemcpy(
CHECK_CUDA_ERROR(cudaMemcpy(
devstruct.number_of_reactants_,
hoststruct.number_of_reactants_,
number_of_reactants_bytes,
cudaMemcpyHostToDevice);
cudaMemcpy(devstruct.reactant_ids_, hoststruct.reactant_ids_, reactant_ids_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(
devstruct.number_of_products_, hoststruct.number_of_products_, number_of_products_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(devstruct.product_ids_, hoststruct.product_ids_, product_ids_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(devstruct.yields_, hoststruct.yields_, yields_bytes, cudaMemcpyHostToDevice);
cudaMemcpyHostToDevice), "cudaMemcpy");
CHECK_CUDA_ERROR(cudaMemcpy(devstruct.reactant_ids_, hoststruct.reactant_ids_, reactant_ids_bytes, cudaMemcpyHostToDevice), "cudaMemcpy");
CHECK_CUDA_ERROR(cudaMemcpy(
devstruct.number_of_products_, hoststruct.number_of_products_, number_of_products_bytes, cudaMemcpyHostToDevice), "cudaMemcpy");
CHECK_CUDA_ERROR(cudaMemcpy(devstruct.product_ids_, hoststruct.product_ids_, product_ids_bytes, cudaMemcpyHostToDevice), "cudaMemcpy");
CHECK_CUDA_ERROR(cudaMemcpy(devstruct.yields_, hoststruct.yields_, yields_bytes, cudaMemcpyHostToDevice), "cudaMemcpy");

devstruct.number_of_reactants_size_ = hoststruct.number_of_reactants_size_;
devstruct.reactant_ids_size_ = hoststruct.reactant_ids_size_;
Expand All @@ -173,11 +174,11 @@ namespace micm
size_t jacobian_flat_ids_bytes = sizeof(size_t) * hoststruct.jacobian_flat_ids_size_;

/// Allocate memory space on the device
cudaMalloc(&(devstruct.jacobian_flat_ids_), jacobian_flat_ids_bytes);
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.jacobian_flat_ids_), jacobian_flat_ids_bytes), "cudaMalloc");

/// Copy the data from host to device
cudaMemcpy(
devstruct.jacobian_flat_ids_, hoststruct.jacobian_flat_ids_, jacobian_flat_ids_bytes, cudaMemcpyHostToDevice);
CHECK_CUDA_ERROR(cudaMemcpy(
devstruct.jacobian_flat_ids_, hoststruct.jacobian_flat_ids_, jacobian_flat_ids_bytes, cudaMemcpyHostToDevice), "cudaMemcpy");

devstruct.jacobian_flat_ids_size_ = hoststruct.jacobian_flat_ids_size_;
}
Expand All @@ -186,14 +187,14 @@ namespace micm
/// members of class "CudaProcessSet" on the device
void FreeConstData(ProcessSetParam& devstruct)
{
cudaFree(devstruct.number_of_reactants_);
cudaFree(devstruct.reactant_ids_);
cudaFree(devstruct.number_of_products_);
cudaFree(devstruct.product_ids_);
cudaFree(devstruct.yields_);
CHECK_CUDA_ERROR(cudaFree(devstruct.number_of_reactants_), "cudaFree");
CHECK_CUDA_ERROR(cudaFree(devstruct.reactant_ids_), "cudaFree");
CHECK_CUDA_ERROR(cudaFree(devstruct.number_of_products_), "cudaFree");
CHECK_CUDA_ERROR(cudaFree(devstruct.product_ids_), "cudaFree");
CHECK_CUDA_ERROR(cudaFree(devstruct.yields_), "cudaFree");
if (devstruct.jacobian_flat_ids_ != nullptr)
{
cudaFree(devstruct.jacobian_flat_ids_);
CHECK_CUDA_ERROR(cudaFree(devstruct.jacobian_flat_ids_), "cudaFree");
}
}

Expand Down
25 changes: 13 additions & 12 deletions src/solver/linear_solver.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* SPDX-License-Identifier: Apache-2.0
*/
#include <micm/util/cuda_param.hpp>
#include <micm/util/cuda_matrix.cuh>

#include <chrono>

Expand Down Expand Up @@ -92,16 +93,16 @@ namespace micm

/// Create a struct whose members contain the addresses in the device memory.
LinearSolverParam devstruct;
cudaMalloc(&(devstruct.nLij_Lii_), nLij_Lii_bytes);
cudaMalloc(&(devstruct.Lij_yj_), Lij_yj_bytes);
cudaMalloc(&(devstruct.nUij_Uii_), nUij_Uii_bytes);
cudaMalloc(&(devstruct.Uij_xj_), Uij_xj_bytes);
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.nLij_Lii_), nLij_Lii_bytes), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.Lij_yj_), Lij_yj_bytes), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.nUij_Uii_), nUij_Uii_bytes), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.Uij_xj_), Uij_xj_bytes), "cudaMalloc");

/// Copy the data from host to device
cudaMemcpy(devstruct.nLij_Lii_, hoststruct.nLij_Lii_, nLij_Lii_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(devstruct.Lij_yj_, hoststruct.Lij_yj_, Lij_yj_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(devstruct.nUij_Uii_, hoststruct.nUij_Uii_, nUij_Uii_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(devstruct.Uij_xj_, hoststruct.Uij_xj_, Uij_xj_bytes, cudaMemcpyHostToDevice);
CHECK_CUDA_ERROR(cudaMemcpy(devstruct.nLij_Lii_, hoststruct.nLij_Lii_, nLij_Lii_bytes, cudaMemcpyHostToDevice), "cudaMemcpy");
CHECK_CUDA_ERROR(cudaMemcpy(devstruct.Lij_yj_, hoststruct.Lij_yj_, Lij_yj_bytes, cudaMemcpyHostToDevice), "cudaMemcpy");
CHECK_CUDA_ERROR(cudaMemcpy(devstruct.nUij_Uii_, hoststruct.nUij_Uii_, nUij_Uii_bytes, cudaMemcpyHostToDevice), "cudaMemcpy");
CHECK_CUDA_ERROR(cudaMemcpy(devstruct.Uij_xj_, hoststruct.Uij_xj_, Uij_xj_bytes, cudaMemcpyHostToDevice), "cudaMemcpy");

devstruct.nLij_Lii_size_ = hoststruct.nLij_Lii_size_;
devstruct.Lij_yj_size_ = hoststruct.Lij_yj_size_;
Expand All @@ -115,10 +116,10 @@ namespace micm
/// members of class "CudaLinearSolver" on the device
void FreeConstData(LinearSolverParam& devstruct)
{
cudaFree(devstruct.nLij_Lii_);
cudaFree(devstruct.Lij_yj_);
cudaFree(devstruct.nUij_Uii_);
cudaFree(devstruct.Uij_xj_);
CHECK_CUDA_ERROR(cudaFree(devstruct.nLij_Lii_), "cudaFree");
CHECK_CUDA_ERROR(cudaFree(devstruct.Lij_yj_), "cudaFree");
CHECK_CUDA_ERROR(cudaFree(devstruct.nUij_Uii_), "cudaFree");
CHECK_CUDA_ERROR(cudaFree(devstruct.Uij_xj_), "cudaFree");
}

void SolveKernelDriver(
Expand Down
62 changes: 30 additions & 32 deletions src/solver/lu_decomposition.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
* SPDX-License-Identifier: Apache-2.0
*/
#include <micm/util/cuda_param.hpp>

#include <chrono>
#include <iostream>
#include <micm/util/cuda_matrix.cuh>

namespace micm
{
Expand Down Expand Up @@ -121,28 +119,28 @@ namespace micm

/// Create a struct whose members contain the addresses in the device memory.
LuDecomposeParam devstruct;
cudaMalloc(&(devstruct.niLU_), niLU_bytes);
cudaMalloc(&(devstruct.do_aik_), do_aik_bytes);
cudaMalloc(&(devstruct.aik_), aik_bytes);
cudaMalloc(&(devstruct.uik_nkj_), uik_nkj_bytes);
cudaMalloc(&(devstruct.lij_ujk_), lij_ujk_bytes);
cudaMalloc(&(devstruct.do_aki_), do_aki_bytes);
cudaMalloc(&(devstruct.aki_), aki_bytes);
cudaMalloc(&(devstruct.lki_nkj_), lki_nkj_bytes);
cudaMalloc(&(devstruct.lkj_uji_), lkj_uji_bytes);
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.niLU_), niLU_bytes), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.do_aik_), do_aik_bytes), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.aik_), aik_bytes), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.uik_nkj_), uik_nkj_bytes), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.lij_ujk_), lij_ujk_bytes), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.do_aki_), do_aki_bytes), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.aki_), aki_bytes), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.lki_nkj_), lki_nkj_bytes), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.lkj_uji_), lkj_uji_bytes), "cudaMalloc");
cudaMalloc(&(devstruct.uii_), uii_bytes);

/// Copy the data from host to device
cudaMemcpy(devstruct.niLU_, hoststruct.niLU_, niLU_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(devstruct.do_aik_, hoststruct.do_aik_, do_aik_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(devstruct.aik_, hoststruct.aik_, aik_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(devstruct.uik_nkj_, hoststruct.uik_nkj_, uik_nkj_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(devstruct.lij_ujk_, hoststruct.lij_ujk_, lij_ujk_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(devstruct.do_aki_, hoststruct.do_aki_, do_aki_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(devstruct.aki_, hoststruct.aki_, aki_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(devstruct.lki_nkj_, hoststruct.lki_nkj_, lki_nkj_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(devstruct.lkj_uji_, hoststruct.lkj_uji_, lkj_uji_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(devstruct.uii_, hoststruct.uii_, uii_bytes, cudaMemcpyHostToDevice);
CHECK_CUDA_ERROR(cudaMemcpy(devstruct.niLU_, hoststruct.niLU_, niLU_bytes, cudaMemcpyHostToDevice), "cudaMemcpy");
CHECK_CUDA_ERROR(cudaMemcpy(devstruct.do_aik_, hoststruct.do_aik_, do_aik_bytes, cudaMemcpyHostToDevice), "cudaMemcpy");
CHECK_CUDA_ERROR(cudaMemcpy(devstruct.aik_, hoststruct.aik_, aik_bytes, cudaMemcpyHostToDevice), "cudaMemcpy");
CHECK_CUDA_ERROR(cudaMemcpy(devstruct.uik_nkj_, hoststruct.uik_nkj_, uik_nkj_bytes, cudaMemcpyHostToDevice), "cudaMemcpy");
CHECK_CUDA_ERROR(cudaMemcpy(devstruct.lij_ujk_, hoststruct.lij_ujk_, lij_ujk_bytes, cudaMemcpyHostToDevice), "cudaMemcpy");
CHECK_CUDA_ERROR(cudaMemcpy(devstruct.do_aki_, hoststruct.do_aki_, do_aki_bytes, cudaMemcpyHostToDevice), "cudaMemcpy");
CHECK_CUDA_ERROR(cudaMemcpy(devstruct.aki_, hoststruct.aki_, aki_bytes, cudaMemcpyHostToDevice), "cudaMemcpy");
CHECK_CUDA_ERROR(cudaMemcpy(devstruct.lki_nkj_, hoststruct.lki_nkj_, lki_nkj_bytes, cudaMemcpyHostToDevice), "cudaMemcpy");
CHECK_CUDA_ERROR(cudaMemcpy(devstruct.lkj_uji_, hoststruct.lkj_uji_, lkj_uji_bytes, cudaMemcpyHostToDevice), "cudaMemcpy");
CHECK_CUDA_ERROR(cudaMemcpy(devstruct.uii_, hoststruct.uii_, uii_bytes, cudaMemcpyHostToDevice), "cudaMemcpy");
devstruct.niLU_size_ = hoststruct.niLU_size_;

return devstruct;
Expand All @@ -152,16 +150,16 @@ namespace micm
/// members of class "CudaLuDecomposition" on the device
void FreeConstData(LuDecomposeParam& devstruct)
{
cudaFree(devstruct.niLU_);
cudaFree(devstruct.do_aik_);
cudaFree(devstruct.aik_);
cudaFree(devstruct.uik_nkj_);
cudaFree(devstruct.lij_ujk_);
cudaFree(devstruct.do_aki_);
cudaFree(devstruct.aki_);
cudaFree(devstruct.lki_nkj_);
cudaFree(devstruct.lkj_uji_);
cudaFree(devstruct.uii_);
CHECK_CUDA_ERROR(cudaFree(devstruct.niLU_), "cudaFree");
CHECK_CUDA_ERROR(cudaFree(devstruct.do_aik_), "cudaFree");
CHECK_CUDA_ERROR(cudaFree(devstruct.aik_), "cudaFree");
CHECK_CUDA_ERROR(cudaFree(devstruct.uik_nkj_), "cudaFree");
CHECK_CUDA_ERROR(cudaFree(devstruct.lij_ujk_), "cudaFree");
CHECK_CUDA_ERROR(cudaFree(devstruct.do_aki_), "cudaFree");
CHECK_CUDA_ERROR(cudaFree(devstruct.aki_), "cudaFree");
CHECK_CUDA_ERROR(cudaFree(devstruct.lki_nkj_), "cudaFree");
CHECK_CUDA_ERROR(cudaFree(devstruct.lkj_uji_), "cudaFree");
CHECK_CUDA_ERROR(cudaFree(devstruct.uii_), "cudaFree");
}

void DecomposeKernelDriver(
Expand Down
28 changes: 13 additions & 15 deletions src/solver/rosenbrock.cu
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@
#include <micm/solver/rosenbrock_solver_parameters.hpp>
#include <micm/util/cuda_param.hpp>
#include <micm/util/internal_error.hpp>

#include <micm/util/cuda_matrix.cuh>
#include <cublas_v2.h>

#include <iostream>

namespace micm
{
namespace cuda
Expand Down Expand Up @@ -48,19 +46,19 @@ namespace micm

/// Create a struct whose members contain the addresses in the device memory.
CudaRosenbrockSolverParam devstruct;
cudaMalloc(&(devstruct.errors_input_), errors_bytes);
cudaMalloc(&(devstruct.errors_output_), errors_bytes);
cudaMalloc(&(devstruct.jacobian_diagonal_elements_), jacobian_diagonal_elements_bytes);
cudaMalloc(&(devstruct.absolute_tolerance_), tolerance_bytes);
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.errors_input_), errors_bytes), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.errors_output_), errors_bytes), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.jacobian_diagonal_elements_), jacobian_diagonal_elements_bytes), "cudaMalloc");
CHECK_CUDA_ERROR(cudaMalloc(&(devstruct.absolute_tolerance_), tolerance_bytes), "cudaMalloc");

/// Copy the data from host to device
cudaMemcpy(
CHECK_CUDA_ERROR(cudaMemcpy(
devstruct.jacobian_diagonal_elements_,
hoststruct.jacobian_diagonal_elements_,
jacobian_diagonal_elements_bytes,
cudaMemcpyHostToDevice);
cudaMemcpyHostToDevice), "cudaMemcpy");

cudaMemcpy(devstruct.absolute_tolerance_, hoststruct.absolute_tolerance_, tolerance_bytes, cudaMemcpyHostToDevice);
CHECK_CUDA_ERROR(cudaMemcpy(devstruct.absolute_tolerance_, hoststruct.absolute_tolerance_, tolerance_bytes, cudaMemcpyHostToDevice), "cudaMemcpy");

devstruct.errors_size_ = hoststruct.errors_size_;
devstruct.jacobian_diagonal_elements_size_ = hoststruct.jacobian_diagonal_elements_size_;
Expand All @@ -73,10 +71,10 @@ namespace micm
/// members and temporary variables of class "CudaLuDecomposition" on the device
void FreeConstData(CudaRosenbrockSolverParam& devstruct)
{
cudaFree(devstruct.errors_input_);
cudaFree(devstruct.errors_output_);
cudaFree(devstruct.jacobian_diagonal_elements_);
cudaFree(devstruct.absolute_tolerance_);
CHECK_CUDA_ERROR(cudaFree(devstruct.errors_input_), "cudaFree");
CHECK_CUDA_ERROR(cudaFree(devstruct.errors_output_), "cudaFree");
CHECK_CUDA_ERROR(cudaFree(devstruct.jacobian_diagonal_elements_), "cudaFree");
CHECK_CUDA_ERROR(cudaFree(devstruct.absolute_tolerance_), "cudaFree");
}

// Specific CUDA device function to do reduction within a warp
Expand Down Expand Up @@ -308,7 +306,7 @@ namespace micm
}
cudaDeviceSynchronize();

cudaMemcpy(&normalized_error, &devstruct.errors_output_[0], sizeof(double), cudaMemcpyDeviceToHost);
CHECK_CUDA_ERROR(cudaMemcpy(&normalized_error, &devstruct.errors_output_[0], sizeof(double), cudaMemcpyDeviceToHost), "cudaMemcpy");
normalized_error = std::sqrt(normalized_error / number_of_elements);
} // end of if-else for CUDA/CUBLAS implementation
return std::max(normalized_error, 1.0e-10);
Expand Down
3 changes: 0 additions & 3 deletions src/util/cuda_matrix.cu
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@
*/
#include <micm/util/cuda_matrix.cuh>
#include <micm/util/internal_error.hpp>

#include <cuda_runtime.h>

#include <iostream>
#include <vector>

namespace micm
Expand Down

0 comments on commit b9bfca3

Please sign in to comment.