testing atomic operation

NCAR · Jun 29, 2023 · a7ad4aa · a7ad4aa
1 parent 903a608
commit a7ad4aa
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 128 deletions.
diff --git a/include/micm/process/process_set.hpp b/include/micm/process/process_set.hpp
@@ -191,23 +191,16 @@ namespace micm
           rate *= cell_state[react_id[i_react]]; 
 
         for (std::size_t i_react = 0; i_react < number_of_reactants_[i_rxn]; ++i_react)
-          cell_forcing[react_id[i_react]] -= rate;
+          cell_forcing[react_id[i_react]] -= rate;   
 
-
-        for (std::size_t i_prod = 0; i_prod < number_of_products_[i_rxn]; ++i_prod){
-          std::cout << "this is cell_forcing data: "<< cell_forcing[prod_id[i_prod]]<<std::endl; 
-          std:: cout << "this is yield data: "<<  yield[i_prod] <<std::endl; 
+        for (std::size_t i_prod = 0; i_prod < number_of_products_[i_rxn]; ++i_prod)
           cell_forcing[prod_id[i_prod]] += yield[i_prod] * rate;
-        }
+
         react_id += number_of_reactants_[i_rxn];
         prod_id += number_of_products_[i_rxn];
         yield += number_of_products_[i_rxn];
       }
     }
-    double* forcing_data = forcing.AsVector().data(); 
-        for (int i = 0; i < forcing.AsVector().size(); i++){
-          std::cout << "this is cell forcing after update: "<< forcing_data[i]<<std::endl; 
-        }
   };
 
   inline void ProcessSet::AddJacobianTerms(

diff --git a/src/process/process_set.cu b/src/process/process_set.cu
@@ -8,13 +8,6 @@ namespace micm {
     //one thread per reaction
     //passing all device pointers 
     __global__ void AddForcingTerms_kernel(
-        double* rate_array,
-        double* rate_array_post,
-        double* state_variable, 
-        double* cell_forcing, 
-        double* cell_forcing_last,
-        size_t* yield,
-
         double* rate_constants, 
         double* state_variables, 
         double* forcing, 
@@ -27,7 +20,7 @@ namespace micm {
         size_t* number_of_products_, 
         size_t* accumulated_n_products, 
         size_t* product_ids_, 
-        size_t* yields_)
+        double* yields_)
     {
     //define thread index 
 
@@ -55,23 +48,18 @@ namespace micm {
         for (int i_reactant = 0; i_reactant < reactant_num; i_reactant++){
             int reactant_ids_index = initial_reactant_ids_index + i_reactant; 
             int state_forcing_col_index = reactant_ids_[reactant_ids_index]; 
-            forcing[row_index * state_forcing_columns + state_forcing_col_index] -=rate; 
-            //debugging 
-            cell_forcing[row_index * state_forcing_columns + state_forcing_col_index] = forcing[row_index * state_forcing_columns + state_forcing_col_index]; 
+            atomicSub_device(forcing[row_index * state_forcing_columns + state_forcing_col_index], rate);
         }
-        __syncthreads();
+
         for (int i_product = 0; i_product < product_num; i_product++){
             int yields_index = initial_yields_index + i_product; 
             int product_ids_index  = initial_product_ids_index + i_product; 
             int forcing_col_index = product_ids_[product_ids_index]; 
-            if (tid == 0){
-                cell_forcing_last[i_product] =  forcing[row_index * state_forcing_columns + forcing_col_index];
-                yield[i_product] = yields_[yields_index];
-            }
-            forcing[row_index * state_forcing_columns + forcing_col_index] += yields_[yields_index] * rate; 
-        } 
-    }
-  }
+            atomicAdd_device(forcing[row_index * state_forcing_columns + forcing_col_index], yields_[yields_index] * rate);
+        } //looping number of product times
+    } // checking valid tid value
+  } //AddForcingTerms_kernel function
+
     void AddForcingTerms_kernelSetup(
         const size_t* number_of_reactants,
         int number_of_reactants_size,
@@ -96,7 +84,7 @@ namespace micm {
         const double* state_variables_data = state_variables.AsVector().data();
         double* forcing_data = forcing.AsVector().data(); 
 
-       //this vector provides initial index to reactant_ids_ to get reactant id of every reaction 
+
         int accumulated_n_reactants_bytes = sizeof(size_t) * (number_of_reactants_size); 
         size_t* accumulated_n_reactants = (size_t*)malloc(accumulated_n_reactants_bytes); 
         accumulated_n_reactants[0] = 0; 
@@ -125,53 +113,17 @@ namespace micm {
         size_t* d_number_of_products_; 
         size_t* d_accumulated_n_products; 
         size_t* d_product_ids_; 
-        size_t* d_yields_; 
+        double* d_yields_; 
 
-        //debugging
-        int rate_array_size = matrix_rows * rate_constants_columns; 
-
-        double* d_rate_array;
-        double* rate_array; 
-        cudaMalloc(&d_rate_array, sizeof(double) * rate_array_size); 
-        rate_array = (double*)malloc(sizeof(double)* rate_array_size);
-
-        double* d_rate_array_post;
-        double* rate_array_post; 
-        cudaMalloc(&d_rate_array_post, sizeof(double) * rate_array_size);
-        rate_array_post = (double*)malloc(sizeof(double)*rate_array_size); 
-
-        double* d_state_variable; 
-        double* state_variable;
-        cudaMalloc(&d_state_variable, sizeof(double) *2); 
-        state_variable = (double*)malloc(sizeof(double) * 2);
-
-        double* d_cell_forcing; 
-        double* cell_forcing; 
-        cudaMalloc(&d_cell_forcing, sizeof(double) * 10); 
-        cell_forcing = (double*)malloc(sizeof(double) * 10); 
-        cudaMemcpy(d_cell_forcing, forcing_data, sizeof(double)* 10, cudaMemcpyHostToDevice); 
-
-        double* d_cell_forcing_last; 
-        double* cell_forcing_last; 
-        cudaMalloc(&d_cell_forcing_last, sizeof(double) * 2); 
-        cell_forcing_last = (double*)malloc(sizeof(double) * 2); 
-
-        size_t* d_yield; 
-        size_t* yield; 
-        cudaMalloc(&d_yield, sizeof(size_t) * 2); 
-        yield = (size_t*)malloc(sizeof(size_t) * 2); 
-
-
-
-
+
         //allocate device memory
         size_t rate_constants_bytes = sizeof(double) * (matrix_rows * rate_constants_columns); 
         size_t state_forcing_bytes = sizeof(double) * (matrix_rows * state_forcing_columns); 
         size_t number_of_reactants_bytes = sizeof(size_t) * number_of_reactants_size;
         size_t reactant_ids_bytes = sizeof(size_t) * reactant_ids_size; 
         size_t number_of_products_bytes = sizeof(size_t) * number_of_products_size; 
         size_t product_ids_bytes = sizeof(size_t) * product_ids_size;
-        size_t yields_bytes = sizeof(size_t) * yields_size;
+        size_t yields_bytes = sizeof(double) * yields_size;
 
         cudaMalloc(&d_rate_constants, rate_constants_bytes); 
         cudaMalloc(&d_state_variables, state_forcing_bytes); 
@@ -202,14 +154,7 @@ namespace micm {
         int num_block = (N + block_size -1)/block_size; 
 
         //kernel function call
-        AddForcingTerms_kernel<<<num_block, block_size>>>(
-            d_rate_array,
-            d_rate_array_post,
-            d_state_variable,
-            d_cell_forcing,
-            d_cell_forcing_last, 
-            d_yield,
-
+        AddForcingTerms_kernel<<<num_block, block_size>>>( 
             d_rate_constants, 
             d_state_variables, 
             d_forcing, 
@@ -227,36 +172,6 @@ namespace micm {
 
         cudaMemcpy(forcing_data, d_forcing, state_forcing_bytes, cudaMemcpyDeviceToHost);
 
-        //debugging 
-        cudaMemcpy(rate_array, d_rate_array, sizeof(double)*rate_array_size, cudaMemcpyDeviceToHost);    
-        std::cout << "this is rate_array before update: "<< std::endl; 
-        for (int k = 0; k < rate_array_size; k++){
-            std::cout << rate_array[k]<<std::endl; 
-        }
-        cudaMemcpy(state_variable, d_state_variable, sizeof(double)*2, cudaMemcpyDeviceToHost);   
-        std::cout << "This is state variable for first thread"<<std::endl;
-        for (int k = 0; k < 2; k++){
-            std::cout << state_variable[k]<<std::endl; 
-        }
-
-        cudaMemcpy(rate_array_post, d_rate_array_post, sizeof(double)*rate_array_size, cudaMemcpyDeviceToHost);    
-        std::cout << "this is rate_array after update: "<< std::endl; 
-        for (int k = 0; k < rate_array_size; k++){
-            std::cout << rate_array_post[k]<<std::endl; 
-        }
-
-       cudaMemcpy(cell_forcing, d_cell_forcing, sizeof(double)*10, cudaMemcpyDeviceToHost); 
-       for (int k = 0; k < 10; k++){
-        std::cout << "this is cell forcing after update"<< cell_forcing[k]<<std::endl; 
-       }
-
-       cudaMemcpy(cell_forcing_last, d_cell_forcing_last, sizeof(double)* 2, cudaMemcpyDeviceToHost); 
-       cudaMemcpy(yield, d_yield, sizeof(size_t) * 2, cudaMemcpyDeviceToHost); 
-       for (int i = 0; i < 2; i++){
-        std:: cout << "this is cell forcing data "<< cell_forcing_last[i]<< std::endl; 
-        std::cout << "this is yield" << yield[i]<<std::endl; 
-       }
-
         cudaFree(d_rate_constants); 
         cudaFree(d_state_variables); 
         cudaFree(d_forcing);
@@ -267,15 +182,10 @@ namespace micm {
         cudaFree(d_accumulated_n_products);
         cudaFree(d_product_ids_);
         cudaFree(d_yields_ );
-        cudaFree(d_rate_array); 
-        cudaFree(d_cell_forcing);
-        cudaFree(d_state_variable); 
         free(accumulated_n_reactants); 
         free(accumulated_n_products); 
-        free(rate_array_post); 
-        free(state_variable); 
-        free(cell_forcing);
+
 
-        }
+     } //kernel function setup
     }//namespace cuda 
 }//namespace micm
diff --git a/test/unit/process/test_cuda_process_set.cpp b/test/unit/process/test_cuda_process_set.cpp
@@ -60,9 +60,6 @@ TEST(ProcessSet, Constructor)
   const double* yields = set.yields_vector().data();
   int yields_size = set.yields_vector().size(); 
 
-  for (int i = 0; i < number_of_products_size; i++){
-    std::cout << "number of products: "<< number_of_products[i]<<std::endl; 
-  }
 
   micm::cuda::AddForcingTerms_kernelSetup(
     number_of_reactants,
@@ -79,16 +76,16 @@ TEST(ProcessSet, Constructor)
     state.variables_, 
     forcing);
 
-  EXPECT_EQ(forcing[0][0], 1000.0 - 10.0 * 0.1 * 0.3 + 20.0 * 0.2);
-  EXPECT_EQ(forcing[1][0], 1000.0 - 110.0 * 1.1 * 1.3 + 120.0 * 1.2);
-  EXPECT_EQ(forcing[0][1], 1000.0 + 10.0 * 0.1 * 0.3 - 20.0 * 0.2);
-  EXPECT_EQ(forcing[1][1], 1000.0 + 110.0 * 1.1 * 1.3 - 120.0 * 1.2);
-  EXPECT_EQ(forcing[0][2], 1000.0 - 10.0 * 0.1 * 0.3);
-  EXPECT_EQ(forcing[1][2], 1000.0 - 110.0 * 1.1 * 1.3);
-  EXPECT_EQ(forcing[0][3], 1000.0 + 20.0 * 0.2 * 1.4 - 30.0 * 0.4);
-  EXPECT_EQ(forcing[1][3], 1000.0 + 120.0 * 1.2 * 1.4 - 130.0 * 1.4);
-  EXPECT_EQ(forcing[0][4], 1000.0 + 10.0 * 0.1 * 0.3 * 2.4);
-  EXPECT_EQ(forcing[1][4], 1000.0 + 110.0 * 1.1 * 1.3 * 2.4);
+  EXPECT_NEAR(forcing[0][0], 1000.0 - 10.0 * 0.1 * 0.3 + 20.0 * 0.2, 1e-15);
+  EXPECT_NEAR(forcing[1][0], 1000.0 - 110.0 * 1.1 * 1.3 + 120.0 * 1.2, 1e-15);
+  EXPECT_NEAR(forcing[0][1], 1000.0 + 10.0 * 0.1 * 0.3 - 20.0 * 0.2, 1e-15);
+  EXPECT_NEAR(forcing[1][1], 1000.0 + 110.0 * 1.1 * 1.3 - 120.0 * 1.2,1e-15);
+  EXPECT_NEAR(forcing[0][2], 1000.0 - 10.0 * 0.1 * 0.3, 1e-15);
+  EXPECT_NEAR(forcing[1][2], 1000.0 - 110.0 * 1.1 * 1.3, 1e-15);
+  EXPECT_NEAR(forcing[0][3], 1000.0 + 20.0 * 0.2 * 1.4 - 30.0 * 0.4, 1e-15);
+  EXPECT_NEAR(forcing[1][3], 1000.0 + 120.0 * 1.2 * 1.4 - 130.0 * 1.4, 1e-15);
+  EXPECT_NEAR(forcing[0][4], 1000.0 + 10.0 * 0.1 * 0.3 * 2.4, 1e-15);
+  EXPECT_NEAR(forcing[1][4], 1000.0 + 110.0 * 1.1 * 1.3 * 2.4, 1e-15);