compiling test

NCAR · Sep 2, 2023 · 9779db7 · 9779db7
1 parent b083012
commit 9779db7
Show file tree

Hide file tree

Showing 4 changed files with 40 additions and 27 deletions.
diff --git a/include/micm/solver/cuda_lu_decomposition.hpp b/include/micm/solver/cuda_lu_decomposition.hpp
@@ -2,6 +2,7 @@
 #include<micm/solver/lu_decomposition.hpp>
 #include<micm/util/cuda_param.hpp>
 #include<thrust/device_vector.h> 
+#include<thrust/pair.h>
 #ifdef USE_CUDA
 #include <micm/solver/cuda_de_composition.cuh>
 #endif 
@@ -105,13 +106,13 @@ namespace micm{
         SparseMatrixPolicy<T>& L, 
         SparseMatrixPolicy<T>& U) const
     {
-        CUDAMatrixParam matrix; 
-        matrix.A = A.AsVector().data(); 
-        matrix.A_size = A.AsVector().size(); 
-        matrix.L = L.AsVector().data(); 
-        matrix.L_size = L.AsVector().size(); 
-        matrix.U = U.AsVector().data(); 
-        matrix.U_size = U.AsVector().size(); 
+        CUDASparseMatrix sparseMatrix; 
+        sparseMatrix.A = A.AsVector().data(); 
+        sparseMatrix.A_size = A.AsVector().size(); 
+        sparseMatrix.L = L.AsVector().data(); 
+        sparseMatrix.L_size = L.AsVector().size(); 
+        sparseMatrix.U = U.AsVector().data(); 
+        sparseMatrix.U_size = U.AsVector().size(); 
 
         CUDASolverParam solver; 
         solver.d_niLU.resize(niLU_.size()); 
@@ -132,10 +133,12 @@ namespace micm{
         solver.do_aki_size = do_aki_.size(); 
         solver.aki = aki_.data(); 
         solver.aki_size = aki_.size(); 
-        solver.uii = uii.data(); 
+        solver.uii = uii_.data(); 
+        solver.uii_size = uii_.size(); 
 
+
         //calling kernelSetup function
-        DecomposeKernelDriver(matrix, solver); 
+        DecomposeKernelDriver(sparseMatrix, solver); 
     }
 
 

diff --git a/include/micm/solver/lu_decomposition.hpp b/include/micm/solver/lu_decomposition.hpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
-
 #include <micm/util/sparse_matrix.hpp>
 
 namespace micm

diff --git a/include/micm/util/cuda_param.hpp b/include/micm/util/cuda_param.hpp
@@ -1,4 +1,5 @@
 #include<thrust/device_vector.h> 
+#include <thrust/pair.h>
 #ifndef CUDA_PARAM_HPP
 #define CUDA_PARAM_HPP
   //member data of class CUDAProcessSet grouped in struct passing to kernel driver function 
@@ -16,11 +17,11 @@
 }; 
 
   struct CUDASolverParam{
-      thrust::device_vector d_niLU<thrust::pair<size_t,size_t>>;
-      thrust::device_vector d_uik_nkj<thrust::pair<size_t,size_t>>; 
-      thrust::device_vector d_lij_ujk<thrust::pair<size_t, size_t>>;
-      thrust::device_vector d_lki_nkj<thrust::pair<size_t, size_t>>; 
-      thrust::device_vector d_lkj_uji<thrust::pair<size_t, size_t>>;
+      thrust::device_vector<thrust::pair<size_t,size_t>> d_niLU;
+      thrust::device_vector<thrust::pair<size_t,size_t>> d_uik_nkj; 
+      thrust::device_vector<thrust::pair<size_t, size_t>> d_lij_ujk;
+      thrust::device_vector<thrust::pair<size_t, size_t>> d_lki_nkj; 
+      thrust::device_vector<thrust::pair<size_t, size_t>> d_lkj_uji;
       const bool* do_aik;
       size_t do_aik_size; 
       const size_t* aik;
@@ -45,5 +46,11 @@
 struct CUDASparseMatrixParam{
    double* jacobian; 
    size_t jacobian_size; 
+   const double* A; 
+   size_t A_size; 
+   double* L; 
+   size_t L_size; 
+   double* U;
+   size_t U_size; 
 };
 #endif
diff --git a/src/solver/lu_decomposition.cu b/src/solver/lu_decomposition.cu
@@ -1,7 +1,8 @@
 #include <iostream> 
 #include <micm/util/cuda_param> 
+#include<thrust/device_vector.h> 
+#include <thrust/pair.h>
 const BLOCK_SIZE = 320; 
-
 struct decomposeDevice{
     double* A; 
     double* L; 
@@ -17,12 +18,16 @@ struct decomposeDevice{
 
 namespace micm{
     namespace cuda{
-        __global__ void Decompose_kernel(){
+        // __global__ void Decompose_kernel(
+        //     decomposeDevice& device, 
+        //     thrust::device_vector d_niLU<thrust::pair<size_t,size_t>>;
+        // )
+        // {
 
-        }
+        // }
 
         void DecomposeKernelDriver(
-            CUDAMatrixParam& matrix, 
+            CUDAMatrixParam& sparseMatrix, 
             CUDASolverParam& solver){
 
             //create device pointers and allocate device memory 
@@ -36,9 +41,9 @@ namespace micm{
             size_t* d_uii; 
             decomposeDevice* device; 
 
-            cudaMalloc(&d_A, sizeof(double)* matrix.A_size); 
-            cudaMalloc(&d_L, sizeof(double)* matrix.L_size); 
-            cudaMalloc(&d_U, sizeof(double)* matrix.U_size); 
+            cudaMalloc(&d_A, sizeof(double)* sparseMatrix.A_size); 
+            cudaMalloc(&d_L, sizeof(double)* sparseMatrix.L_size); 
+            cudaMalloc(&d_U, sizeof(double)* sparseMatrix.U_size); 
             cudaMalloc(&d_do_aik, sizeof(bool)* solver.do_aik_size); 
             cudaMalloc(&d_aik, sizeof(size_t)* solver.aik_size); 
             cudaMalloc(&d_do_aki, sizeof(bool)* solver.do_aki_size); 
@@ -47,9 +52,9 @@ namespace micm{
             cudaMalloc(&device, sizeof(decomposeDevice)); 
 
             //transfer data from host to device 
-            cudaMemcpy(d_A, matrix.A, sizeof(double)* matrix.A_size, cudaMemcpyHostToDevice); 
-            cudaMemcpy(d_L, matrix.L, sizeof(double)* matrix.L_size, cudaMemcpyHostToDevice); 
-            cudaMemcpy(d_U, matrix.U, sizeof(double)* matrix.U_size, cudaMemcpyHostToDevice); 
+            cudaMemcpy(d_A, sparseMatrix.A, sizeof(double)* sparseMatrix.A_size, cudaMemcpyHostToDevice); 
+            cudaMemcpy(d_L, sparseMatrix.L, sizeof(double)* sparseMatrix.L_size, cudaMemcpyHostToDevice); 
+            cudaMemcpy(d_U, sparseMatrix.U, sizeof(double)* sparseMatrix.U_size, cudaMemcpyHostToDevice); 
             cudaMemcpy(d_do_aik, solver.do_aik, sizeof(bool)* solver.do_aik_size, cudaMemcpyHostToDevice); 
             cudaMemcpy(d_aik, solver.aik, sizeof(size_t)* solver.aik_size, cudaMemcpyHostToDevice); 
             cudaMemcpy(d_do_aki, solver.do_aki, sizeof(bool)* solver.do_aki_size, cudaMemcpyHostToDevice); 
@@ -63,8 +68,7 @@ namespace micm{
             cudaMemcpy(&(device->aki),&d_aki, sizeof(size_t*), cudaMemcpyHostToDevice); 
             cudaMemcpy(&(device->uii), &d_uii, sizeof(size_t*), cudaMemcpyHostToDevice);
 
-
-
+            size_t num_block = (sparseMatrix.A_size + BLOCK_SIZE - 1) / BLOCK_SIZE;
 
             }