Skip to content

Commit

Permalink
testing atomic operation
Browse files Browse the repository at this point in the history
  • Loading branch information
qinatan committed Jun 29, 2023
1 parent 903a608 commit a7ad4aa
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 128 deletions.
13 changes: 3 additions & 10 deletions include/micm/process/process_set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,23 +191,16 @@ namespace micm
rate *= cell_state[react_id[i_react]];

for (std::size_t i_react = 0; i_react < number_of_reactants_[i_rxn]; ++i_react)
cell_forcing[react_id[i_react]] -= rate;
cell_forcing[react_id[i_react]] -= rate;


for (std::size_t i_prod = 0; i_prod < number_of_products_[i_rxn]; ++i_prod){
std::cout << "this is cell_forcing data: "<< cell_forcing[prod_id[i_prod]]<<std::endl;
std:: cout << "this is yield data: "<< yield[i_prod] <<std::endl;
for (std::size_t i_prod = 0; i_prod < number_of_products_[i_rxn]; ++i_prod)
cell_forcing[prod_id[i_prod]] += yield[i_prod] * rate;
}

react_id += number_of_reactants_[i_rxn];
prod_id += number_of_products_[i_rxn];
yield += number_of_products_[i_rxn];
}
}
double* forcing_data = forcing.AsVector().data();
for (int i = 0; i < forcing.AsVector().size(); i++){
std::cout << "this is cell forcing after update: "<< forcing_data[i]<<std::endl;
}
};

inline void ProcessSet::AddJacobianTerms(
Expand Down
120 changes: 15 additions & 105 deletions src/process/process_set.cu
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,6 @@ namespace micm {
//one thread per reaction
//passing all device pointers
__global__ void AddForcingTerms_kernel(
double* rate_array,
double* rate_array_post,
double* state_variable,
double* cell_forcing,
double* cell_forcing_last,
size_t* yield,

double* rate_constants,
double* state_variables,
double* forcing,
Expand All @@ -27,7 +20,7 @@ namespace micm {
size_t* number_of_products_,
size_t* accumulated_n_products,
size_t* product_ids_,
size_t* yields_)
double* yields_)
{
//define thread index

Expand Down Expand Up @@ -55,23 +48,18 @@ namespace micm {
for (int i_reactant = 0; i_reactant < reactant_num; i_reactant++){
int reactant_ids_index = initial_reactant_ids_index + i_reactant;
int state_forcing_col_index = reactant_ids_[reactant_ids_index];
forcing[row_index * state_forcing_columns + state_forcing_col_index] -=rate;
//debugging
cell_forcing[row_index * state_forcing_columns + state_forcing_col_index] = forcing[row_index * state_forcing_columns + state_forcing_col_index];
atomicSub_device(forcing[row_index * state_forcing_columns + state_forcing_col_index], rate);
}
__syncthreads();

for (int i_product = 0; i_product < product_num; i_product++){
int yields_index = initial_yields_index + i_product;
int product_ids_index = initial_product_ids_index + i_product;
int forcing_col_index = product_ids_[product_ids_index];
if (tid == 0){
cell_forcing_last[i_product] = forcing[row_index * state_forcing_columns + forcing_col_index];
yield[i_product] = yields_[yields_index];
}
forcing[row_index * state_forcing_columns + forcing_col_index] += yields_[yields_index] * rate;
}
}
}
atomicAdd_device(forcing[row_index * state_forcing_columns + forcing_col_index], yields_[yields_index] * rate);
} //looping number of product times
} // checking valid tid value
} //AddForcingTerms_kernel function

void AddForcingTerms_kernelSetup(
const size_t* number_of_reactants,
int number_of_reactants_size,
Expand All @@ -96,7 +84,7 @@ namespace micm {
const double* state_variables_data = state_variables.AsVector().data();
double* forcing_data = forcing.AsVector().data();

//this vector provides initial index to reactant_ids_ to get reactant id of every reaction

int accumulated_n_reactants_bytes = sizeof(size_t) * (number_of_reactants_size);
size_t* accumulated_n_reactants = (size_t*)malloc(accumulated_n_reactants_bytes);
accumulated_n_reactants[0] = 0;
Expand Down Expand Up @@ -125,53 +113,17 @@ namespace micm {
size_t* d_number_of_products_;
size_t* d_accumulated_n_products;
size_t* d_product_ids_;
size_t* d_yields_;
double* d_yields_;

//debugging
int rate_array_size = matrix_rows * rate_constants_columns;

double* d_rate_array;
double* rate_array;
cudaMalloc(&d_rate_array, sizeof(double) * rate_array_size);
rate_array = (double*)malloc(sizeof(double)* rate_array_size);

double* d_rate_array_post;
double* rate_array_post;
cudaMalloc(&d_rate_array_post, sizeof(double) * rate_array_size);
rate_array_post = (double*)malloc(sizeof(double)*rate_array_size);

double* d_state_variable;
double* state_variable;
cudaMalloc(&d_state_variable, sizeof(double) *2);
state_variable = (double*)malloc(sizeof(double) * 2);

double* d_cell_forcing;
double* cell_forcing;
cudaMalloc(&d_cell_forcing, sizeof(double) * 10);
cell_forcing = (double*)malloc(sizeof(double) * 10);
cudaMemcpy(d_cell_forcing, forcing_data, sizeof(double)* 10, cudaMemcpyHostToDevice);

double* d_cell_forcing_last;
double* cell_forcing_last;
cudaMalloc(&d_cell_forcing_last, sizeof(double) * 2);
cell_forcing_last = (double*)malloc(sizeof(double) * 2);

size_t* d_yield;
size_t* yield;
cudaMalloc(&d_yield, sizeof(size_t) * 2);
yield = (size_t*)malloc(sizeof(size_t) * 2);





//allocate device memory
size_t rate_constants_bytes = sizeof(double) * (matrix_rows * rate_constants_columns);
size_t state_forcing_bytes = sizeof(double) * (matrix_rows * state_forcing_columns);
size_t number_of_reactants_bytes = sizeof(size_t) * number_of_reactants_size;
size_t reactant_ids_bytes = sizeof(size_t) * reactant_ids_size;
size_t number_of_products_bytes = sizeof(size_t) * number_of_products_size;
size_t product_ids_bytes = sizeof(size_t) * product_ids_size;
size_t yields_bytes = sizeof(size_t) * yields_size;
size_t yields_bytes = sizeof(double) * yields_size;

cudaMalloc(&d_rate_constants, rate_constants_bytes);
cudaMalloc(&d_state_variables, state_forcing_bytes);
Expand Down Expand Up @@ -202,14 +154,7 @@ namespace micm {
int num_block = (N + block_size -1)/block_size;

//kernel function call
AddForcingTerms_kernel<<<num_block, block_size>>>(
d_rate_array,
d_rate_array_post,
d_state_variable,
d_cell_forcing,
d_cell_forcing_last,
d_yield,

AddForcingTerms_kernel<<<num_block, block_size>>>(
d_rate_constants,
d_state_variables,
d_forcing,
Expand All @@ -227,36 +172,6 @@ namespace micm {

cudaMemcpy(forcing_data, d_forcing, state_forcing_bytes, cudaMemcpyDeviceToHost);

//debugging
cudaMemcpy(rate_array, d_rate_array, sizeof(double)*rate_array_size, cudaMemcpyDeviceToHost);
std::cout << "this is rate_array before update: "<< std::endl;
for (int k = 0; k < rate_array_size; k++){
std::cout << rate_array[k]<<std::endl;
}
cudaMemcpy(state_variable, d_state_variable, sizeof(double)*2, cudaMemcpyDeviceToHost);
std::cout << "This is state variable for first thread"<<std::endl;
for (int k = 0; k < 2; k++){
std::cout << state_variable[k]<<std::endl;
}

cudaMemcpy(rate_array_post, d_rate_array_post, sizeof(double)*rate_array_size, cudaMemcpyDeviceToHost);
std::cout << "this is rate_array after update: "<< std::endl;
for (int k = 0; k < rate_array_size; k++){
std::cout << rate_array_post[k]<<std::endl;
}

cudaMemcpy(cell_forcing, d_cell_forcing, sizeof(double)*10, cudaMemcpyDeviceToHost);
for (int k = 0; k < 10; k++){
std::cout << "this is cell forcing after update"<< cell_forcing[k]<<std::endl;
}

cudaMemcpy(cell_forcing_last, d_cell_forcing_last, sizeof(double)* 2, cudaMemcpyDeviceToHost);
cudaMemcpy(yield, d_yield, sizeof(size_t) * 2, cudaMemcpyDeviceToHost);
for (int i = 0; i < 2; i++){
std:: cout << "this is cell forcing data "<< cell_forcing_last[i]<< std::endl;
std::cout << "this is yield" << yield[i]<<std::endl;
}

cudaFree(d_rate_constants);
cudaFree(d_state_variables);
cudaFree(d_forcing);
Expand All @@ -267,15 +182,10 @@ namespace micm {
cudaFree(d_accumulated_n_products);
cudaFree(d_product_ids_);
cudaFree(d_yields_ );
cudaFree(d_rate_array);
cudaFree(d_cell_forcing);
cudaFree(d_state_variable);
free(accumulated_n_reactants);
free(accumulated_n_products);
free(rate_array_post);
free(state_variable);
free(cell_forcing);


}
} //kernel function setup
}//namespace cuda
}//namespace micm
23 changes: 10 additions & 13 deletions test/unit/process/test_cuda_process_set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,6 @@ TEST(ProcessSet, Constructor)
const double* yields = set.yields_vector().data();
int yields_size = set.yields_vector().size();

for (int i = 0; i < number_of_products_size; i++){
std::cout << "number of products: "<< number_of_products[i]<<std::endl;
}

micm::cuda::AddForcingTerms_kernelSetup(
number_of_reactants,
Expand All @@ -79,16 +76,16 @@ TEST(ProcessSet, Constructor)
state.variables_,
forcing);

EXPECT_EQ(forcing[0][0], 1000.0 - 10.0 * 0.1 * 0.3 + 20.0 * 0.2);
EXPECT_EQ(forcing[1][0], 1000.0 - 110.0 * 1.1 * 1.3 + 120.0 * 1.2);
EXPECT_EQ(forcing[0][1], 1000.0 + 10.0 * 0.1 * 0.3 - 20.0 * 0.2);
EXPECT_EQ(forcing[1][1], 1000.0 + 110.0 * 1.1 * 1.3 - 120.0 * 1.2);
EXPECT_EQ(forcing[0][2], 1000.0 - 10.0 * 0.1 * 0.3);
EXPECT_EQ(forcing[1][2], 1000.0 - 110.0 * 1.1 * 1.3);
EXPECT_EQ(forcing[0][3], 1000.0 + 20.0 * 0.2 * 1.4 - 30.0 * 0.4);
EXPECT_EQ(forcing[1][3], 1000.0 + 120.0 * 1.2 * 1.4 - 130.0 * 1.4);
EXPECT_EQ(forcing[0][4], 1000.0 + 10.0 * 0.1 * 0.3 * 2.4);
EXPECT_EQ(forcing[1][4], 1000.0 + 110.0 * 1.1 * 1.3 * 2.4);
EXPECT_NEAR(forcing[0][0], 1000.0 - 10.0 * 0.1 * 0.3 + 20.0 * 0.2, 1e-15);
EXPECT_NEAR(forcing[1][0], 1000.0 - 110.0 * 1.1 * 1.3 + 120.0 * 1.2, 1e-15);
EXPECT_NEAR(forcing[0][1], 1000.0 + 10.0 * 0.1 * 0.3 - 20.0 * 0.2, 1e-15);
EXPECT_NEAR(forcing[1][1], 1000.0 + 110.0 * 1.1 * 1.3 - 120.0 * 1.2,1e-15);
EXPECT_NEAR(forcing[0][2], 1000.0 - 10.0 * 0.1 * 0.3, 1e-15);
EXPECT_NEAR(forcing[1][2], 1000.0 - 110.0 * 1.1 * 1.3, 1e-15);
EXPECT_NEAR(forcing[0][3], 1000.0 + 20.0 * 0.2 * 1.4 - 30.0 * 0.4, 1e-15);
EXPECT_NEAR(forcing[1][3], 1000.0 + 120.0 * 1.2 * 1.4 - 130.0 * 1.4, 1e-15);
EXPECT_NEAR(forcing[0][4], 1000.0 + 10.0 * 0.1 * 0.3 * 2.4, 1e-15);
EXPECT_NEAR(forcing[1][4], 1000.0 + 110.0 * 1.1 * 1.3 * 2.4, 1e-15);



Expand Down

0 comments on commit a7ad4aa

Please sign in to comment.