From 23ea7942bf15f7cc46dd9332e7016437fc3efb2e Mon Sep 17 00:00:00 2001 From: Qina Tan Date: Fri, 23 Jun 2023 09:13:35 -0600 Subject: [PATCH] debugging in progress --- src/process/process_set.cu | 57 +++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 31 deletions(-) diff --git a/src/process/process_set.cu b/src/process/process_set.cu index 54575b4b8..4d92f0019 100644 --- a/src/process/process_set.cu +++ b/src/process/process_set.cu @@ -23,33 +23,34 @@ namespace micm { size_t* yields_) { //define thread index - int tid = blockIdx.x + blockDim.x + threadIdx.x; + int tid = blockIdx.x * blockDim.x + threadIdx.x; int rate_constants_size = matrix_rows * rate_constants_columns; + if (tid < rate_constants_size){ - // int rate = rate_constants[tid]; // rate of a specific reaction in a specific gridcell - // int row_index = tid % rate_constants_columns; - // int reactant_num = number_of_reactants_[tid % rate_constants_columns]; //number of reactants of the reaction - // int product_num = number_of_products_[tid % rate_constants_columns]; //number of products of the reaction - // int initial_reactant_ids_index = accumulated_n_reactants[tid % rate_constants_columns]; - // int initial_product_ids_index = accumulated_n_products[tid % rate_constants_columns]; - // int initial_yields_index = accumulated_n_products[tid % rate_constants_columns]; + int rate = rate_constants[tid]; // rate of a specific reaction in a specific gridcell + int row_index = tid % rate_constants_columns; + int reactant_num = number_of_reactants_[tid % rate_constants_columns]; //number of reactants of the reaction + int product_num = number_of_products_[tid % rate_constants_columns]; //number of products of the reaction + int initial_reactant_ids_index = accumulated_n_reactants[tid % rate_constants_columns]; + int initial_product_ids_index = accumulated_n_products[tid % rate_constants_columns]; + int initial_yields_index = accumulated_n_products[tid % rate_constants_columns]; - // for (int i_reactant = 0; i_reactant < reactant_num; i_reactant++){ - // int reactant_ids_index = initial_reactant_ids_index + i_reactant; - // int state_forcing_col_index = reactant_ids_[reactant_ids_index]; - // //how to match thread idx to state_variable index - // //but we need to consider the row of state_variable - // rate *= state_variables[row_index * state_forcing_columns + state_forcing_col_index]; - // forcing[row_index * state_forcing_columns + state_forcing_col_index] -= rate; - // } - // for (int i_product = 0; i_product < product_num; i_product++){ - // int yields_index = initial_yields_index + i_product; - // int product_ids_index = initial_product_ids_index + i_product; - // int forcing_col_index = product_ids_[product_ids_index]; - // forcing[row_index * state_forcing_columns + forcing_col_index] += yields_[yields_index] * rate; - // } - forcing[tid] = tid; + for (int i_reactant = 0; i_reactant < reactant_num; i_reactant++){ + int reactant_ids_index = initial_reactant_ids_index + i_reactant; + int state_forcing_col_index = reactant_ids_[reactant_ids_index]; + //how to match thread idx to state_variable index + //but we need to consider the row of state_variable + rate *= state_variables[row_index * state_forcing_columns + state_forcing_col_index]; + forcing[row_index * state_forcing_columns + state_forcing_col_index] -= rate; + } + for (int i_product = 0; i_product < product_num; i_product++){ + int yields_index = initial_yields_index + i_product; + int product_ids_index = initial_product_ids_index + i_product; + int forcing_col_index = product_ids_[product_ids_index]; + forcing[row_index * state_forcing_columns + forcing_col_index] += yields_[yields_index] * rate; + } + } } void AddForcingTerms_kernelSetup( @@ -137,21 +138,15 @@ namespace micm { cudaMemcpy(d_yields_, yields, yields_bytes, cudaMemcpyHostToDevice); //total thread count == rate_constants matrix size - //int threads_count = matrix_rows * rate_constants_columns; - int threads_count = matrix_rows * state_forcing_columns; - //block size - int threadsPerBlock = 128; //32 threads per warp * 4 warps - //grid size - int blocks_count = (int)ceil(threads_count/threadsPerBlock); + int N = matrix_rows * rate_constants_columns; //kernel function call - AddForcingTerms_kernel<<>>(d_rate_constants, d_state_variables, + AddForcingTerms_kernel<<<(N+255)/256, 256>>>(d_rate_constants, d_state_variables, d_forcing, matrix_rows, rate_constants_columns, state_forcing_columns, d_number_of_reactants_, d_accumulated_n_reactants, d_reactant_ids_, d_number_of_products_, d_accumulated_n_products, d_product_ids_, d_yields_); cudaDeviceSynchronize(); - cudaMemcpy(forcing_data, d_forcing, state_forcing_bytes, cudaMemcpyDeviceToHost); cudaFree(d_rate_constants);