debugging

NCAR · Jun 22, 2023 · 74cc829 · 74cc829
1 parent 673b07f
commit 74cc829
Show file tree

Hide file tree

Showing 4 changed files with 57 additions and 38 deletions.
diff --git a/README.md b/README.md
@@ -33,10 +33,10 @@ make
 
 #### On Gust
 ```
-qinteractive -A<ACCOUNT_NUMBER> --ngpus=1
+qinteractive -A NTDD0005 --ngpus=1
 module load cmake/3.25.2 nvhpc/23.1 cuda/11.7.1
 mkdir build && cd build
-cmake -DENABLE_OPENACC=ON -DENABLE_CUDA=ON -DENABLE_REGESSION_TESTS=OFF -D GPU_TYPE="a100" ..
+cmake -DENABLE_OPENACC=OFF -DENABLE_CUDA=ON -DENABLE_REGESSION_TESTS=OFF -D GPU_TYPE="a100" .
 make
 make test
 ```
diff --git a/include/micm/process/process_set.hpp b/include/micm/process/process_set.hpp
@@ -16,7 +16,6 @@ namespace micm
   /// @brief Solver function calculators for a collection of processes
   class ProcessSet
   {
-
     std::vector<std::size_t> number_of_reactants_;
     std::vector<std::size_t> reactant_ids_;
     std::vector<std::size_t> number_of_products_;

diff --git a/src/process/process_set.cu b/src/process/process_set.cu
@@ -26,29 +26,30 @@ namespace micm {
     int tid = blockIdx.x + blockDim.x + threadIdx.x; 
     int rate_constants_size = matrix_rows * rate_constants_columns; 
     if (tid < rate_constants_size){
-        int rate = rate_constants[tid]; // rate of a specific reaction in a specific gridcell 
-        int row_index = tid % rate_constants_columns; 
-        int reactant_num = number_of_reactants_[tid % rate_constants_columns]; //number of reactants of the reaction
-        int product_num = number_of_products_[tid % rate_constants_columns]; //number of products of the reaction 
-        int initial_reactant_ids_index = accumulated_n_reactants[tid % rate_constants_columns];
-        int initial_product_ids_index = accumulated_n_products[tid % rate_constants_columns];
-        int initial_yields_index = accumulated_n_products[tid % rate_constants_columns]; 
+        // int rate = rate_constants[tid]; // rate of a specific reaction in a specific gridcell 
+        // int row_index = tid % rate_constants_columns; 
+        // int reactant_num = number_of_reactants_[tid % rate_constants_columns]; //number of reactants of the reaction
+        // int product_num = number_of_products_[tid % rate_constants_columns]; //number of products of the reaction 
+        // int initial_reactant_ids_index = accumulated_n_reactants[tid % rate_constants_columns];
+        // int initial_product_ids_index = accumulated_n_products[tid % rate_constants_columns];
+        // int initial_yields_index = accumulated_n_products[tid % rate_constants_columns]; 
 
 
-        for (int i_reactant = 0; i_reactant < reactant_num; i_reactant++){
-            int reactant_ids_index = initial_reactant_ids_index + i_reactant; 
-            int state_forcing_col_index = reactant_ids_[reactant_ids_index]; 
-            //how to match thread idx to state_variable index 
-            //but we need to consider the row of state_variable 
-            rate *= state_variables[row_index * state_forcing_columns + state_forcing_col_index]; 
-            forcing[row_index * state_forcing_columns + state_forcing_col_index] -= rate; 
-        }
-        for (int i_product = 0; i_product < product_num; i_product++){
-            int yields_index = initial_yields_index + i_product; 
-            int product_ids_index  = initial_product_ids_index + i_product; 
-            int forcing_col_index = product_ids_[product_ids_index]; 
-            forcing[row_index * state_forcing_columns + forcing_col_index] += yields_[yields_index] * rate; 
-        }   
+        // for (int i_reactant = 0; i_reactant < reactant_num; i_reactant++){
+        //     int reactant_ids_index = initial_reactant_ids_index + i_reactant; 
+        //     int state_forcing_col_index = reactant_ids_[reactant_ids_index]; 
+        //     //how to match thread idx to state_variable index 
+        //     //but we need to consider the row of state_variable 
+        //     rate *= state_variables[row_index * state_forcing_columns + state_forcing_col_index]; 
+        //     forcing[row_index * state_forcing_columns + state_forcing_col_index] -= rate; 
+        // }
+        // for (int i_product = 0; i_product < product_num; i_product++){
+        //     int yields_index = initial_yields_index + i_product; 
+        //     int product_ids_index  = initial_product_ids_index + i_product; 
+        //     int forcing_col_index = product_ids_[product_ids_index]; 
+        //     forcing[row_index * state_forcing_columns + forcing_col_index] += yields_[yields_index] * rate; 
+        // } 
+        forcing[tid] = tid;   
     }
   }
     void AddForcingTerms_kernelSetup(
@@ -136,7 +137,8 @@ namespace micm {
         cudaMemcpy(d_yields_, yields, yields_bytes, cudaMemcpyHostToDevice); 
 
         //total thread count == rate_constants matrix size
-        int threads_count = matrix_rows * rate_constants_columns; 
+        //int threads_count = matrix_rows * rate_constants_columns; 
+        int threads_count = matrix_rows * state_forcing_columns;
         //block size 
         int threadsPerBlock = 128; //32 threads per warp * 4 warps
         //grid size 
@@ -149,7 +151,8 @@ namespace micm {
         d_number_of_products_, d_accumulated_n_products, d_product_ids_, 
         d_yields_);
         cudaDeviceSynchronize(); 
-        cudaMemcpy(d_forcing, forcing_data, state_forcing_bytes, cudaMemcpyDeviceToHost);
+
+        cudaMemcpy(forcing_data, d_forcing, state_forcing_bytes, cudaMemcpyDeviceToHost);
 
         cudaFree(d_rate_constants); 
         cudaFree(d_state_variables); 

diff --git a/test/unit/process/test_cuda_process_set.cpp b/test/unit/process/test_cuda_process_set.cpp
@@ -37,17 +37,6 @@ TEST(ProcessSet, Constructor)
 
   micm::ProcessSet set{ std::vector<micm::Process>{ r1, r2, r3 }, state };
 
-  const size_t* number_of_reactants = set.number_of_reactants_vector().data();
-  int number_of_reactants_size = set.number_of_reactants_vector().size();
-  const size_t* reactant_ids = set.reactant_ids_vector().data();
-  int reactant_ids_size = set.reactant_ids_vector().size();
-  const size_t* number_of_products = set.number_of_products_vector().data();
-  int number_of_products_size = set.number_of_products_vector().size();
-  const size_t* product_ids = set.product_ids_vector().data(); 
-  int product_ids_size = set.product_ids_vector().size();
-  const double* yields = set.yields_vector().data();
-  int yields_size = set.yields_vector().size(); 
-
   EXPECT_EQ(state.variables_.size(), 2);
   EXPECT_EQ(state.variables_[0].size(), 5);
   state.variables_[0] = { 0.1, 0.2, 0.3, 0.4, 0.5 };
@@ -58,7 +47,26 @@ TEST(ProcessSet, Constructor)
   rate_constants[1] = { 110.0, 120.0, 130.0 };
 
   micm::Matrix<double> forcing{ 2, 5, 1000.0 };
-
+
+  //debugging 
+  std::cout<< "Before operation"<<std::endl; 
+  double* forcing_data = forcing.AsVector().data(); 
+  int forcing_data_size = forcing.AsVector().size();
+  for (int j = 0; j < forcing_data_size; j++){
+    std::cout << forcing_data[j]<<std::endl;
+  }
+
+  const size_t* number_of_reactants = set.number_of_reactants_vector().data();
+  int number_of_reactants_size = set.number_of_reactants_vector().size();
+  const size_t* reactant_ids = set.reactant_ids_vector().data();
+  int reactant_ids_size = set.reactant_ids_vector().size();
+  const size_t* number_of_products = set.number_of_products_vector().data();
+  int number_of_products_size = set.number_of_products_vector().size();
+  const size_t* product_ids = set.product_ids_vector().data(); 
+  int product_ids_size = set.product_ids_vector().size();
+  const double* yields = set.yields_vector().data();
+  int yields_size = set.yields_vector().size(); 
+
   micm::cuda::AddForcingTerms_kernelSetup(
     number_of_reactants,
     number_of_reactants_size,
@@ -84,6 +92,15 @@ TEST(ProcessSet, Constructor)
   EXPECT_EQ(forcing[1][3], 1000.0 + 120.0 * 1.2 * 1.4 - 130.0 * 1.4);
   EXPECT_EQ(forcing[0][4], 1000.0 + 10.0 * 0.1 * 0.3 * 2.4);
   EXPECT_EQ(forcing[1][4], 1000.0 + 110.0 * 1.1 * 1.3 * 2.4);
+
+  //debugging 
+  std::cout<< "After operation operation"<<std::endl; 
+  double* forcing_data_after = forcing.AsVector().data(); 
+
+  for (int k = 0; k < forcing_data_size; k++){
+    std::cout << forcing_data_after[k]<<std::endl;
+  }
+
 
 //   auto non_zero_elements = set.NonZeroJacobianElements();