From 9779db7c6f3b8bc2ebf509ac26af7cefd76344bf Mon Sep 17 00:00:00 2001 From: Qina Tan Date: Sat, 2 Sep 2023 11:11:26 -0600 Subject: [PATCH] compiling test --- include/micm/solver/cuda_lu_decomposition.hpp | 21 ++++++++------ include/micm/solver/lu_decomposition.hpp | 1 - include/micm/util/cuda_param.hpp | 17 +++++++---- src/solver/lu_decomposition.cu | 28 +++++++++++-------- 4 files changed, 40 insertions(+), 27 deletions(-) diff --git a/include/micm/solver/cuda_lu_decomposition.hpp b/include/micm/solver/cuda_lu_decomposition.hpp index efa13b567..8ecae4d16 100644 --- a/include/micm/solver/cuda_lu_decomposition.hpp +++ b/include/micm/solver/cuda_lu_decomposition.hpp @@ -2,6 +2,7 @@ #include #include #include +#include #ifdef USE_CUDA #include #endif @@ -105,13 +106,13 @@ namespace micm{ SparseMatrixPolicy& L, SparseMatrixPolicy& U) const { - CUDAMatrixParam matrix; - matrix.A = A.AsVector().data(); - matrix.A_size = A.AsVector().size(); - matrix.L = L.AsVector().data(); - matrix.L_size = L.AsVector().size(); - matrix.U = U.AsVector().data(); - matrix.U_size = U.AsVector().size(); + CUDASparseMatrix sparseMatrix; + sparseMatrix.A = A.AsVector().data(); + sparseMatrix.A_size = A.AsVector().size(); + sparseMatrix.L = L.AsVector().data(); + sparseMatrix.L_size = L.AsVector().size(); + sparseMatrix.U = U.AsVector().data(); + sparseMatrix.U_size = U.AsVector().size(); CUDASolverParam solver; solver.d_niLU.resize(niLU_.size()); @@ -132,10 +133,12 @@ namespace micm{ solver.do_aki_size = do_aki_.size(); solver.aki = aki_.data(); solver.aki_size = aki_.size(); - solver.uii = uii.data(); + solver.uii = uii_.data(); + solver.uii_size = uii_.size(); + //calling kernelSetup function - DecomposeKernelDriver(matrix, solver); + DecomposeKernelDriver(sparseMatrix, solver); } diff --git a/include/micm/solver/lu_decomposition.hpp b/include/micm/solver/lu_decomposition.hpp index e44baf678..2aa2b1b39 100644 --- a/include/micm/solver/lu_decomposition.hpp +++ b/include/micm/solver/lu_decomposition.hpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 #pragma once - #include namespace micm diff --git a/include/micm/util/cuda_param.hpp b/include/micm/util/cuda_param.hpp index 56a218c89..8423d7204 100644 --- a/include/micm/util/cuda_param.hpp +++ b/include/micm/util/cuda_param.hpp @@ -1,4 +1,5 @@ #include +#include #ifndef CUDA_PARAM_HPP #define CUDA_PARAM_HPP //member data of class CUDAProcessSet grouped in struct passing to kernel driver function @@ -16,11 +17,11 @@ }; struct CUDASolverParam{ - thrust::device_vector d_niLU>; - thrust::device_vector d_uik_nkj>; - thrust::device_vector d_lij_ujk>; - thrust::device_vector d_lki_nkj>; - thrust::device_vector d_lkj_uji>; + thrust::device_vector> d_niLU; + thrust::device_vector> d_uik_nkj; + thrust::device_vector> d_lij_ujk; + thrust::device_vector> d_lki_nkj; + thrust::device_vector> d_lkj_uji; const bool* do_aik; size_t do_aik_size; const size_t* aik; @@ -45,5 +46,11 @@ struct CUDASparseMatrixParam{ double* jacobian; size_t jacobian_size; + const double* A; + size_t A_size; + double* L; + size_t L_size; + double* U; + size_t U_size; }; #endif \ No newline at end of file diff --git a/src/solver/lu_decomposition.cu b/src/solver/lu_decomposition.cu index c18d1534d..f072e98e2 100644 --- a/src/solver/lu_decomposition.cu +++ b/src/solver/lu_decomposition.cu @@ -1,7 +1,8 @@ #include #include +#include +#include const BLOCK_SIZE = 320; - struct decomposeDevice{ double* A; double* L; @@ -17,12 +18,16 @@ struct decomposeDevice{ namespace micm{ namespace cuda{ - __global__ void Decompose_kernel(){ + // __global__ void Decompose_kernel( + // decomposeDevice& device, + // thrust::device_vector d_niLU>; + // ) + // { - } + // } void DecomposeKernelDriver( - CUDAMatrixParam& matrix, + CUDAMatrixParam& sparseMatrix, CUDASolverParam& solver){ //create device pointers and allocate device memory @@ -36,9 +41,9 @@ namespace micm{ size_t* d_uii; decomposeDevice* device; - cudaMalloc(&d_A, sizeof(double)* matrix.A_size); - cudaMalloc(&d_L, sizeof(double)* matrix.L_size); - cudaMalloc(&d_U, sizeof(double)* matrix.U_size); + cudaMalloc(&d_A, sizeof(double)* sparseMatrix.A_size); + cudaMalloc(&d_L, sizeof(double)* sparseMatrix.L_size); + cudaMalloc(&d_U, sizeof(double)* sparseMatrix.U_size); cudaMalloc(&d_do_aik, sizeof(bool)* solver.do_aik_size); cudaMalloc(&d_aik, sizeof(size_t)* solver.aik_size); cudaMalloc(&d_do_aki, sizeof(bool)* solver.do_aki_size); @@ -47,9 +52,9 @@ namespace micm{ cudaMalloc(&device, sizeof(decomposeDevice)); //transfer data from host to device - cudaMemcpy(d_A, matrix.A, sizeof(double)* matrix.A_size, cudaMemcpyHostToDevice); - cudaMemcpy(d_L, matrix.L, sizeof(double)* matrix.L_size, cudaMemcpyHostToDevice); - cudaMemcpy(d_U, matrix.U, sizeof(double)* matrix.U_size, cudaMemcpyHostToDevice); + cudaMemcpy(d_A, sparseMatrix.A, sizeof(double)* sparseMatrix.A_size, cudaMemcpyHostToDevice); + cudaMemcpy(d_L, sparseMatrix.L, sizeof(double)* sparseMatrix.L_size, cudaMemcpyHostToDevice); + cudaMemcpy(d_U, sparseMatrix.U, sizeof(double)* sparseMatrix.U_size, cudaMemcpyHostToDevice); cudaMemcpy(d_do_aik, solver.do_aik, sizeof(bool)* solver.do_aik_size, cudaMemcpyHostToDevice); cudaMemcpy(d_aik, solver.aik, sizeof(size_t)* solver.aik_size, cudaMemcpyHostToDevice); cudaMemcpy(d_do_aki, solver.do_aki, sizeof(bool)* solver.do_aki_size, cudaMemcpyHostToDevice); @@ -63,8 +68,7 @@ namespace micm{ cudaMemcpy(&(device->aki),&d_aki, sizeof(size_t*), cudaMemcpyHostToDevice); cudaMemcpy(&(device->uii), &d_uii, sizeof(size_t*), cudaMemcpyHostToDevice); - - + size_t num_block = (sparseMatrix.A_size + BLOCK_SIZE - 1) / BLOCK_SIZE; }