Skip to content

Commit

Permalink
debugging
Browse files Browse the repository at this point in the history
  • Loading branch information
qinatan committed Jun 22, 2023
1 parent 673b07f commit 74cc829
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 38 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ make

#### On Gust
```
qinteractive -A<ACCOUNT_NUMBER> --ngpus=1
qinteractive -A NTDD0005 --ngpus=1
module load cmake/3.25.2 nvhpc/23.1 cuda/11.7.1
mkdir build && cd build
cmake -DENABLE_OPENACC=ON -DENABLE_CUDA=ON -DENABLE_REGESSION_TESTS=OFF -D GPU_TYPE="a100" ..
cmake -DENABLE_OPENACC=OFF -DENABLE_CUDA=ON -DENABLE_REGESSION_TESTS=OFF -D GPU_TYPE="a100" .
make
make test
```
1 change: 0 additions & 1 deletion include/micm/process/process_set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ namespace micm
/// @brief Solver function calculators for a collection of processes
class ProcessSet
{

std::vector<std::size_t> number_of_reactants_;
std::vector<std::size_t> reactant_ids_;
std::vector<std::size_t> number_of_products_;
Expand Down
49 changes: 26 additions & 23 deletions src/process/process_set.cu
Original file line number Diff line number Diff line change
Expand Up @@ -26,29 +26,30 @@ namespace micm {
int tid = blockIdx.x + blockDim.x + threadIdx.x;
int rate_constants_size = matrix_rows * rate_constants_columns;
if (tid < rate_constants_size){
int rate = rate_constants[tid]; // rate of a specific reaction in a specific gridcell
int row_index = tid % rate_constants_columns;
int reactant_num = number_of_reactants_[tid % rate_constants_columns]; //number of reactants of the reaction
int product_num = number_of_products_[tid % rate_constants_columns]; //number of products of the reaction
int initial_reactant_ids_index = accumulated_n_reactants[tid % rate_constants_columns];
int initial_product_ids_index = accumulated_n_products[tid % rate_constants_columns];
int initial_yields_index = accumulated_n_products[tid % rate_constants_columns];
// int rate = rate_constants[tid]; // rate of a specific reaction in a specific gridcell
// int row_index = tid % rate_constants_columns;
// int reactant_num = number_of_reactants_[tid % rate_constants_columns]; //number of reactants of the reaction
// int product_num = number_of_products_[tid % rate_constants_columns]; //number of products of the reaction
// int initial_reactant_ids_index = accumulated_n_reactants[tid % rate_constants_columns];
// int initial_product_ids_index = accumulated_n_products[tid % rate_constants_columns];
// int initial_yields_index = accumulated_n_products[tid % rate_constants_columns];


for (int i_reactant = 0; i_reactant < reactant_num; i_reactant++){
int reactant_ids_index = initial_reactant_ids_index + i_reactant;
int state_forcing_col_index = reactant_ids_[reactant_ids_index];
//how to match thread idx to state_variable index
//but we need to consider the row of state_variable
rate *= state_variables[row_index * state_forcing_columns + state_forcing_col_index];
forcing[row_index * state_forcing_columns + state_forcing_col_index] -= rate;
}
for (int i_product = 0; i_product < product_num; i_product++){
int yields_index = initial_yields_index + i_product;
int product_ids_index = initial_product_ids_index + i_product;
int forcing_col_index = product_ids_[product_ids_index];
forcing[row_index * state_forcing_columns + forcing_col_index] += yields_[yields_index] * rate;
}
// for (int i_reactant = 0; i_reactant < reactant_num; i_reactant++){
// int reactant_ids_index = initial_reactant_ids_index + i_reactant;
// int state_forcing_col_index = reactant_ids_[reactant_ids_index];
// //how to match thread idx to state_variable index
// //but we need to consider the row of state_variable
// rate *= state_variables[row_index * state_forcing_columns + state_forcing_col_index];
// forcing[row_index * state_forcing_columns + state_forcing_col_index] -= rate;
// }
// for (int i_product = 0; i_product < product_num; i_product++){
// int yields_index = initial_yields_index + i_product;
// int product_ids_index = initial_product_ids_index + i_product;
// int forcing_col_index = product_ids_[product_ids_index];
// forcing[row_index * state_forcing_columns + forcing_col_index] += yields_[yields_index] * rate;
// }
forcing[tid] = tid;
}
}
void AddForcingTerms_kernelSetup(
Expand Down Expand Up @@ -136,7 +137,8 @@ namespace micm {
cudaMemcpy(d_yields_, yields, yields_bytes, cudaMemcpyHostToDevice);

//total thread count == rate_constants matrix size
int threads_count = matrix_rows * rate_constants_columns;
//int threads_count = matrix_rows * rate_constants_columns;
int threads_count = matrix_rows * state_forcing_columns;
//block size
int threadsPerBlock = 128; //32 threads per warp * 4 warps
//grid size
Expand All @@ -149,7 +151,8 @@ namespace micm {
d_number_of_products_, d_accumulated_n_products, d_product_ids_,
d_yields_);
cudaDeviceSynchronize();
cudaMemcpy(d_forcing, forcing_data, state_forcing_bytes, cudaMemcpyDeviceToHost);

cudaMemcpy(forcing_data, d_forcing, state_forcing_bytes, cudaMemcpyDeviceToHost);

cudaFree(d_rate_constants);
cudaFree(d_state_variables);
Expand Down
41 changes: 29 additions & 12 deletions test/unit/process/test_cuda_process_set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,6 @@ TEST(ProcessSet, Constructor)

micm::ProcessSet set{ std::vector<micm::Process>{ r1, r2, r3 }, state };

const size_t* number_of_reactants = set.number_of_reactants_vector().data();
int number_of_reactants_size = set.number_of_reactants_vector().size();
const size_t* reactant_ids = set.reactant_ids_vector().data();
int reactant_ids_size = set.reactant_ids_vector().size();
const size_t* number_of_products = set.number_of_products_vector().data();
int number_of_products_size = set.number_of_products_vector().size();
const size_t* product_ids = set.product_ids_vector().data();
int product_ids_size = set.product_ids_vector().size();
const double* yields = set.yields_vector().data();
int yields_size = set.yields_vector().size();

EXPECT_EQ(state.variables_.size(), 2);
EXPECT_EQ(state.variables_[0].size(), 5);
state.variables_[0] = { 0.1, 0.2, 0.3, 0.4, 0.5 };
Expand All @@ -58,7 +47,26 @@ TEST(ProcessSet, Constructor)
rate_constants[1] = { 110.0, 120.0, 130.0 };

micm::Matrix<double> forcing{ 2, 5, 1000.0 };


//debugging
std::cout<< "Before operation"<<std::endl;
double* forcing_data = forcing.AsVector().data();
int forcing_data_size = forcing.AsVector().size();
for (int j = 0; j < forcing_data_size; j++){
std::cout << forcing_data[j]<<std::endl;
}

const size_t* number_of_reactants = set.number_of_reactants_vector().data();
int number_of_reactants_size = set.number_of_reactants_vector().size();
const size_t* reactant_ids = set.reactant_ids_vector().data();
int reactant_ids_size = set.reactant_ids_vector().size();
const size_t* number_of_products = set.number_of_products_vector().data();
int number_of_products_size = set.number_of_products_vector().size();
const size_t* product_ids = set.product_ids_vector().data();
int product_ids_size = set.product_ids_vector().size();
const double* yields = set.yields_vector().data();
int yields_size = set.yields_vector().size();

micm::cuda::AddForcingTerms_kernelSetup(
number_of_reactants,
number_of_reactants_size,
Expand All @@ -84,6 +92,15 @@ TEST(ProcessSet, Constructor)
EXPECT_EQ(forcing[1][3], 1000.0 + 120.0 * 1.2 * 1.4 - 130.0 * 1.4);
EXPECT_EQ(forcing[0][4], 1000.0 + 10.0 * 0.1 * 0.3 * 2.4);
EXPECT_EQ(forcing[1][4], 1000.0 + 110.0 * 1.1 * 1.3 * 2.4);

//debugging
std::cout<< "After operation operation"<<std::endl;
double* forcing_data_after = forcing.AsVector().data();

for (int k = 0; k < forcing_data_size; k++){
std::cout << forcing_data_after[k]<<std::endl;
}


// auto non_zero_elements = set.NonZeroJacobianElements();

Expand Down

0 comments on commit 74cc829

Please sign in to comment.