diff --git a/src/process/process_set.cu b/src/process/process_set.cu index 6fbf36ba5..7093c50a9 100644 --- a/src/process/process_set.cu +++ b/src/process/process_set.cu @@ -9,25 +9,6 @@ namespace micm { //one thread per reaction //passing all device pointers - -__device__ double atomicSub(double* address, double val) -{ - unsigned long long int* address_as_ull = - (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed, - __double_as_longlong(val - - __longlong_as_double(assumed))); - - // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) - } while (assumed != old); - - return __longlong_as_double(old); -} - __device__ double atomicAdd(double* address, double val) { unsigned long long int* address_as_ull = @@ -87,7 +68,8 @@ __device__ double atomicAdd(double* address, double val) for (int i_reactant = 0; i_reactant < reactant_num; i_reactant++){ int reactant_ids_index = initial_reactant_ids_index + i_reactant; int state_forcing_col_index = reactant_ids_[reactant_ids_index]; - atomicSub(&forcing[row_index * state_forcing_columns + state_forcing_col_index], rate); + double rate_subtration = 0 - rate; + atomicSub(&forcing[row_index * state_forcing_columns + state_forcing_col_index], rate_subtration); } for (int i_product = 0; i_product < product_num; i_product++){ @@ -211,6 +193,7 @@ __device__ double atomicAdd(double* address, double val) cudaMemcpy(forcing_data, d_forcing, state_forcing_bytes, cudaMemcpyDeviceToHost); + //clean up cudaFree(d_rate_constants); cudaFree(d_state_variables); cudaFree(d_forcing); diff --git a/test/unit/process/test_cuda_process_set.cpp b/test/unit/process/test_cuda_process_set.cpp index 1d31d1298..30b567dd0 100644 --- a/test/unit/process/test_cuda_process_set.cpp +++ b/test/unit/process/test_cuda_process_set.cpp @@ -76,16 +76,16 @@ TEST(ProcessSet, Constructor) state.variables_, forcing); - EXPECT_NEAR(forcing[0][0], 1000.0 - 10.0 * 0.1 * 0.3 + 20.0 * 0.2, 1e-15); - EXPECT_NEAR(forcing[1][0], 1000.0 - 110.0 * 1.1 * 1.3 + 120.0 * 1.2, 1e-15); - EXPECT_NEAR(forcing[0][1], 1000.0 + 10.0 * 0.1 * 0.3 - 20.0 * 0.2, 1e-15); - EXPECT_NEAR(forcing[1][1], 1000.0 + 110.0 * 1.1 * 1.3 - 120.0 * 1.2,1e-15); - EXPECT_NEAR(forcing[0][2], 1000.0 - 10.0 * 0.1 * 0.3, 1e-15); - EXPECT_NEAR(forcing[1][2], 1000.0 - 110.0 * 1.1 * 1.3, 1e-15); - EXPECT_NEAR(forcing[0][3], 1000.0 + 20.0 * 0.2 * 1.4 - 30.0 * 0.4, 1e-15); - EXPECT_NEAR(forcing[1][3], 1000.0 + 120.0 * 1.2 * 1.4 - 130.0 * 1.4, 1e-15); - EXPECT_NEAR(forcing[0][4], 1000.0 + 10.0 * 0.1 * 0.3 * 2.4, 1e-15); - EXPECT_NEAR(forcing[1][4], 1000.0 + 110.0 * 1.1 * 1.3 * 2.4, 1e-15); + EXPECT_DOUBLE_EQ(forcing[0][0], 1000.0 - 10.0 * 0.1 * 0.3 + 20.0 * 0.2, 1e-15); + EXPECT_DOUBLE_EQ(forcing[1][0], 1000.0 - 110.0 * 1.1 * 1.3 + 120.0 * 1.2, 1e-15); + EXPECT_DOUBLE_EQ(forcing[0][1], 1000.0 + 10.0 * 0.1 * 0.3 - 20.0 * 0.2, 1e-15); + EXPECT_DOUBLE_EQ(forcing[1][1], 1000.0 + 110.0 * 1.1 * 1.3 - 120.0 * 1.2,1e-15); + EXPECT_DOUBLE_EQ(forcing[0][2], 1000.0 - 10.0 * 0.1 * 0.3, 1e-15); + EXPECT_DOUBLE_EQ(forcing[1][2], 1000.0 - 110.0 * 1.1 * 1.3, 1e-15); + EXPECT_DOUBLE_EQ(forcing[0][3], 1000.0 + 20.0 * 0.2 * 1.4 - 30.0 * 0.4, 1e-15); + EXPECT_DOUBLE_EQ(forcing[1][3], 1000.0 + 120.0 * 1.2 * 1.4 - 130.0 * 1.4, 1e-15); + EXPECT_DOUBLE_EQ(forcing[0][4], 1000.0 + 10.0 * 0.1 * 0.3 * 2.4, 1e-15); + EXPECT_DOUBLE_EQ(forcing[1][4], 1000.0 + 110.0 * 1.1 * 1.3 * 2.4, 1e-15);