Skip to content

Commit

Permalink
work in progress - 8/31
Browse files Browse the repository at this point in the history
  • Loading branch information
qinatan committed Sep 1, 2023
1 parent e09a00a commit d9187b1
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 66 deletions.
60 changes: 42 additions & 18 deletions include/micm/solver/cuda_lu_decomposition.hpp
Original file line number Diff line number Diff line change
@@ -1,28 +1,28 @@
#pragma once
#include<micm/solver/lu_decomposition.hpp>
#include<micm/util/cuda_param.hpp>
#include<thrust/device_vector.h>
#ifdef USE_CUDA
#include <micm/solver/cuda_de_composition.cuh>
#endif

#ifdef USE_CUDA
namespace micm{
class CUDALuDecomposition: public LuDecomposition{
public:
CUDALuDecomposition();
CUDALuDecomposition();
/// @brief Construct an LU decomposition algorithm for a given sparse matrix
/// @param matrix Sparse matrix
template<typename T, typename OrderingPolicy>
CUDALuDecomposition(const SparseMatrix<T, OrderingPolicy>& matrix);

#ifdef USE_CUDA
template<typename T, template<class> typename SparseMatrixPolicy>
requires VectorizableSparse<SparseMatrixPolicy<T>>
void Decompose(
const SparseMatrixPolicy<T>&A,
SparseMatrixPolicy<T>& L,
SparseMatrixPolicy<T>& U) const;
#endif
CUDALuDecomposition(const SparseMatrix<T, OrderingPolicy>& matrix);

template<typename T, template<class> typename SparseMatrixPolicy>
requires VectorizableSparse<SparseMatrixPolicy<T>>
void Decompose(
const SparseMatrixPolicy<T>&A,
SparseMatrixPolicy<T>& L,
SparseMatrixPolicy<T>& U) const;

inline CUDALuDecomposition::CUDALuDecomposition(){};

template<typename T, typename OrderingPolicy>
Expand Down Expand Up @@ -105,16 +105,40 @@ namespace micm{
SparseMatrixPolicy<T>& L,
SparseMatrixPolicy<T>& U) const
{
thrust::device_vector d_niLU<std::pair<size_t,size_t>> = niLU_;
thrust::device_vector d_uik_nkj<std::pair<size_t,size_t>> = uik_nkj_;
thrust::device_vector d_lij_ujk<std::pair<size_t, size_t>> = lij_ujk_;
thrust::device_vector d_lki_nkj<std::pair<size_t, size_t>> = lki_nkj_;
thrust::device_vector d_lkj_uji<std::pair<size_t, size_t>> = lkj_uji_;
CUDAMatrixParam matrix;
matrix.A = A.AsVector().data();
matrix.A_size = A.AsVector().size();
matrix.L = L.AsVector().data();
matrix.L_size = L.AsVector().size();
matrix.U = U.AsVector().data();
matrix.U_size = U.AsVector().size();

CUDASolverParam solver;
solver.d_niLU.resize(niLU_.size());
solver.d_uik_nkj.resize(uik_nkj_.size());
solver.d_lij_ujk.resize(lij_ujk_.size());
solver.d_lki_nkj.resize(lki_nkj_.size());
solver.d_lkj_uji.resize(lkj_uji_.size());
solver.d_niLU = niLU_;
solver.d_uik_nkj = uik_nkj_;
solver.d_lij_ujk = lij_ujk_;
solver.d_lki_nkj = lki_nkj_;
solver.d_lkj_uji = lkj_uji_;
solver.do_aik = do_aik_.data();
solver.do_aik_size = do_aik.size();
solver.aik = aik_.data();
solver.aik_size = aik_.size();
solver.do_aki = do_aki_.data();
solver.do_aki_size = do_aki_.size();
solver.aki = aki_.data();
solver.aki_size = aki_.size();
solver.uii = uii.data();

//calling kernelSetup function

DecomposeKernelDriver(matrix, solver);
}


} //end class CUDALuDecomposition
}//end micm
}//end micm
#endif
28 changes: 27 additions & 1 deletion include/micm/util/cuda_param.hpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include<thrust/device_vector.h>
#ifndef CUDA_PARAM_HPP
#define CUDA_PARAM_HPP

Expand All @@ -14,15 +15,40 @@
const size_t* jacobian_flat_ids;
size_t jacobian_flat_ids_size;
};
//different matrix data grouped in struct passing to kernel driver function

struct CUDASolverParam{
thrust::device_vector d_niLU<thrust::pair<size_t,size_t>>;
thrust::device_vector d_uik_nkj<thrust::pair<size_t,size_t>>;
thrust::device_vector d_lij_ujk<thrust::pair<size_t, size_t>>;
thrust::device_vector d_lki_nkj<thrust::pair<size_t, size_t>>;
thrust::device_vector d_lkj_uji<thrust::pair<size_t, size_t>>;
const bool* do_aik;
size_t do_aik_size;
const size_t* aik;
size_t aik_size;
const bool* do_aki;
size_t do_aki_size;
const size_t* aki;
size_t aki_size;
const size_t* uii;
size_t uii_size;
}
//different matrix data grouped in struct passing to kernel driver function
struct CUDAMatrixParam{
const double* rate_constants_;
const double* state_variables_;
double* forcing_;
double* jacobian_;
const double* A;
double* L;
double* U;

size_t n_grids_;
size_t n_reactions_;
size_t n_species_;
size_t jacobian_size_;
size_t A_size;
size_t L_size;
size_t U_size;
};
#endif
1 change: 1 addition & 0 deletions src/process/process_set.cu
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ typedef struct jacobianDevice{
double* yields;
size_t* jacobian_flat_ids;
};

const size_t BLOCK_SIZE = 320;

namespace micm
Expand Down
93 changes: 46 additions & 47 deletions src/solver/lu_decomposition.cu
Original file line number Diff line number Diff line change
@@ -1,41 +1,29 @@
#include <iostream>
struct pair{
size_t first;
size_t second;
}
#include <micm/util/cuda_param>
const BLOCK_SIZE = 320;

struct decomposeDevice{
double* A;
double* L;
double* U;
bool* do_aik;
size_t* aik;
bool* do_aki;
size_t* aki;
size_t* uii;
};



namespace micm{
namespace cuda{
__global__ void Decompose_kernel(){

}

void decompose_kernelSetup(
const double* A,
size_t A_size,
double* L,
size_t L_size,
double* U,
size_t U_size,
size_t* niLu, //pair
size_t* niLu_size,
bool* do_aik,
size_t do_aik_size
size_t* aik,
size_t aik_size,
size_t* uik_nkj, //pair
size_t uik_nki_size,
size_t* lij_ujk, //pair
size_t lij_ujk_size,
bool* do_aki,
size_t do_aki_size,
size_t* aki,
size_t aki_size,
size_t* lki_nkj, //pair
size_t lki_nkj_size,
size_t* lkj_uji, //pair
size_t lki_uji_size,
size_t* uii,
size_t uii_size){
void DecomposeKernelDriver(
CUDAMatrixParam& matrix,
CUDASolverParam& solver){

//create device pointers and allocate device memory
double* d_A;
Expand All @@ -46,26 +34,37 @@ namespace micm{
bool* d_do_aki;
size_t* d_aki;
size_t* d_uii;
decomposeDevice* device;

cudaMalloc(&d_A, A, sizeof(double)* A_size);
cudaMalloc(&d_L, L, sizeof(double)* L_size);
cudaMalloc(&d_U, U, sizeof(double)* U_size);
cudaMalloc(&d_do_aik, sizeof(bool)* do_aik_size);
cudaMalloc(&d_aik, sizeof(size_t)* aik_size);
cudaMalloc(&d_do_aki, sizeof(bool)* do_aki_size);
cudaMalloc(&d_aki, sizeof(size_t)* aki_size);
cudaMalloc(d_uii, sizeof(size_t)* uii_size);
cudaMalloc(&d_A, sizeof(double)* matrix.A_size);
cudaMalloc(&d_L, sizeof(double)* matrix.L_size);
cudaMalloc(&d_U, sizeof(double)* matrix.U_size);
cudaMalloc(&d_do_aik, sizeof(bool)* solver.do_aik_size);
cudaMalloc(&d_aik, sizeof(size_t)* solver.aik_size);
cudaMalloc(&d_do_aki, sizeof(bool)* solver.do_aki_size);
cudaMalloc(&d_aki, sizeof(size_t)* solver.aki_size);
cudaMalloc(&d_uii, sizeof(size_t)* solver.uii_size);
cudaMalloc(&device, sizeof(decomposeDevice));

//transfer data from host to device
cudaMemcpy(d_A, A, sizeof(double)* A_size, cudaMemcpyHostToDevice);
cudaMemcpy(d_L, L, sizeof(double)* L_size, cudaMemcpyHostToDevice);
cudaMemcpy(d_U, U, sizeof(double)* U_size, cudaMemcpyHostToDevice);
cudaMemcpy(d_do_aik, do_aik, sizeof(bool)* do_aik_size, cudaMemcpyHostToDevice);
cudaMemcpy(d_aik, aik, sizeof(size_t)* aik_size, cudaMemcpyHostToDevice);
cudaMemcpy(d_do_aki, do_aki, sizeof(bool)* do_aki_size, cudaMemcpyHostToDevice);

cudaMemcpy(d_A, matrix.A, sizeof(double)* matrix.A_size, cudaMemcpyHostToDevice);
cudaMemcpy(d_L, matrix.L, sizeof(double)* matrix.L_size, cudaMemcpyHostToDevice);
cudaMemcpy(d_U, matrix.U, sizeof(double)* matrix.U_size, cudaMemcpyHostToDevice);
cudaMemcpy(d_do_aik, solver.do_aik, sizeof(bool)* solver.do_aik_size, cudaMemcpyHostToDevice);
cudaMemcpy(d_aik, solver.aik, sizeof(size_t)* solver.aik_size, cudaMemcpyHostToDevice);
cudaMemcpy(d_do_aki, solver.do_aki, sizeof(bool)* solver.do_aki_size, cudaMemcpyHostToDevice);
cudaMemcpy(d_uii, solver.uii, sizeof(size_t)* solver.uii_size, cudaMemcpyHostToDevice);
cudaMemcpy(&(device->A),&d_A, sizeof(double*), cudaMemcpyHostToDevice);
cudaMemcpy(&(device->L),&d_L, sizeof(double*), cudaMemcpyHostToDevice);
cudaMemcpy(&(device->U),&d_U, sizeof(double*), cudaMemcpyHostToDevice);
cudaMemcpy(&(device->do_aik), &d_do_aik, sizeof(bool*), cudaMemcpyHostToDevice);
cudaMemcpy(&(device->aik), &d_aik, sizeof(size_t*), cudaMemcpyHostToDevice);
cudaMemcpy(&(device->do_aki),&d_do_aki,sizeof(bool*),cudaMemcpyHostToDevice);
cudaMemcpy(&(device->aki),&d_aki, sizeof(size_t*), cudaMemcpyHostToDevice);
cudaMemcpy(&(device->uii), &d_uii, sizeof(size_t*), cudaMemcpyHostToDevice);





}

Expand Down

0 comments on commit d9187b1

Please sign in to comment.