diff --git a/src/process/process_set.cu b/src/process/process_set.cu
index 6fbf36ba5..7093c50a9 100644
--- a/src/process/process_set.cu
+++ b/src/process/process_set.cu
@@ -9,25 +9,6 @@ namespace micm {
     //one thread per reaction
     //passing all device pointers 
 
-   
-__device__ double atomicSub(double* address, double val)
-{
-    unsigned long long int* address_as_ull =
-                              (unsigned long long int*)address;
-    unsigned long long int old = *address_as_ull, assumed;
-
-    do {
-        assumed = old;
-        old = atomicCAS(address_as_ull, assumed,
-                        __double_as_longlong(val -
-                               __longlong_as_double(assumed)));
-
-    // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
-    } while (assumed != old);
-
-    return __longlong_as_double(old);
-}
-
 __device__ double atomicAdd(double* address, double val)
 {
     unsigned long long int* address_as_ull =
@@ -87,7 +68,8 @@ __device__ double atomicAdd(double* address, double val)
         for (int i_reactant = 0; i_reactant < reactant_num; i_reactant++){
             int reactant_ids_index = initial_reactant_ids_index + i_reactant; 
             int state_forcing_col_index = reactant_ids_[reactant_ids_index]; 
-            atomicSub(&forcing[row_index * state_forcing_columns + state_forcing_col_index], rate);
+            double rate_subtration = 0 - rate; 
+            atomicSub(&forcing[row_index * state_forcing_columns + state_forcing_col_index], rate_subtration);
         }
 
         for (int i_product = 0; i_product < product_num; i_product++){
@@ -211,6 +193,7 @@ __device__ double atomicAdd(double* address, double val)
         
         cudaMemcpy(forcing_data, d_forcing, state_forcing_bytes, cudaMemcpyDeviceToHost);
         
+        //clean up
         cudaFree(d_rate_constants); 
         cudaFree(d_state_variables); 
         cudaFree(d_forcing);
diff --git a/test/unit/process/test_cuda_process_set.cpp b/test/unit/process/test_cuda_process_set.cpp
index 1d31d1298..30b567dd0 100644
--- a/test/unit/process/test_cuda_process_set.cpp
+++ b/test/unit/process/test_cuda_process_set.cpp
@@ -76,16 +76,16 @@ TEST(ProcessSet, Constructor)
     state.variables_, 
     forcing);
 
-  EXPECT_NEAR(forcing[0][0], 1000.0 - 10.0 * 0.1 * 0.3 + 20.0 * 0.2, 1e-15);
-  EXPECT_NEAR(forcing[1][0], 1000.0 - 110.0 * 1.1 * 1.3 + 120.0 * 1.2, 1e-15);
-  EXPECT_NEAR(forcing[0][1], 1000.0 + 10.0 * 0.1 * 0.3 - 20.0 * 0.2, 1e-15);
-  EXPECT_NEAR(forcing[1][1], 1000.0 + 110.0 * 1.1 * 1.3 - 120.0 * 1.2,1e-15);
-  EXPECT_NEAR(forcing[0][2], 1000.0 - 10.0 * 0.1 * 0.3, 1e-15);
-  EXPECT_NEAR(forcing[1][2], 1000.0 - 110.0 * 1.1 * 1.3, 1e-15);
-  EXPECT_NEAR(forcing[0][3], 1000.0 + 20.0 * 0.2 * 1.4 - 30.0 * 0.4, 1e-15);
-  EXPECT_NEAR(forcing[1][3], 1000.0 + 120.0 * 1.2 * 1.4 - 130.0 * 1.4, 1e-15);
-  EXPECT_NEAR(forcing[0][4], 1000.0 + 10.0 * 0.1 * 0.3 * 2.4, 1e-15);
-  EXPECT_NEAR(forcing[1][4], 1000.0 + 110.0 * 1.1 * 1.3 * 2.4, 1e-15);
+  EXPECT_DOUBLE_EQ(forcing[0][0], 1000.0 - 10.0 * 0.1 * 0.3 + 20.0 * 0.2, 1e-15);
+  EXPECT_DOUBLE_EQ(forcing[1][0], 1000.0 - 110.0 * 1.1 * 1.3 + 120.0 * 1.2, 1e-15);
+  EXPECT_DOUBLE_EQ(forcing[0][1], 1000.0 + 10.0 * 0.1 * 0.3 - 20.0 * 0.2, 1e-15);
+  EXPECT_DOUBLE_EQ(forcing[1][1], 1000.0 + 110.0 * 1.1 * 1.3 - 120.0 * 1.2,1e-15);
+  EXPECT_DOUBLE_EQ(forcing[0][2], 1000.0 - 10.0 * 0.1 * 0.3, 1e-15);
+  EXPECT_DOUBLE_EQ(forcing[1][2], 1000.0 - 110.0 * 1.1 * 1.3, 1e-15);
+  EXPECT_DOUBLE_EQ(forcing[0][3], 1000.0 + 20.0 * 0.2 * 1.4 - 30.0 * 0.4, 1e-15);
+  EXPECT_DOUBLE_EQ(forcing[1][3], 1000.0 + 120.0 * 1.2 * 1.4 - 130.0 * 1.4, 1e-15);
+  EXPECT_DOUBLE_EQ(forcing[0][4], 1000.0 + 10.0 * 0.1 * 0.3 * 2.4, 1e-15);
+  EXPECT_DOUBLE_EQ(forcing[1][4], 1000.0 + 110.0 * 1.1 * 1.3 * 2.4, 1e-15);