From 2b4762f02af2ed136134c7f0570646219753ab3e Mon Sep 17 00:00:00 2001 From: Matthew Martineau Date: Wed, 25 Oct 2023 12:57:50 +0100 Subject: [PATCH] Fix final set of warnings ready for v2.4.0 (#274) * Fixed missing detail in changelog and fixed warning. * Fixed unreachable code warning. * Tweaking changelog. * Fixed some unused variable warnings. * Fixed ifdef for USE_CUDAMALLOCASYNC --- CHANGELOG | 14 +++++++++++++- examples/amgx_mpi_capi_cla.c | 4 ++-- include/global_thread_handle.h | 2 +- src/aggregation/aggregation_amg_level.cu | 6 +----- src/distributed/comms_mpi_hostbuffer_stream.cu | 8 ++++---- src/global_thread_handle.cu | 6 +++--- src/matrix.cu | 1 - 7 files changed, 24 insertions(+), 17 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index f3d3f16f..5da7a1ef 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -31,6 +31,18 @@ Changes: - Fixed issue with exact_coarse_solve grid sizing - Fixed issue with use_sum_stopping_criteria - Fixed SIGFPE that could occur when the initial norm is 0 +- Added a new API call AMGX_matrix_check_symmetry, that tests if a matrix is structurally or completely symmetric + +Tested configurations: + +Linux x86-64: +-- Ubuntu 20.04, Ubuntu 22.04 +-- NVHPC 23.7, GCC 9.4.0, GCC 12.1 +-- OpenMPI 4.0.x +-- CUDA 11.2, 11.8, 12.2 +-- A100, H100 + +Note that while AMGX has support for building in Windows, testing on Windows is very limited. =============================================================== @@ -103,4 +115,4 @@ v2.0.0 - 2017.10.17 --------------------------------------------------------------- -Initial open source release \ No newline at end of file +Initial open source release diff --git a/examples/amgx_mpi_capi_cla.c b/examples/amgx_mpi_capi_cla.c index edb81130..f76de616 100644 --- a/examples/amgx_mpi_capi_cla.c +++ b/examples/amgx_mpi_capi_cla.c @@ -166,8 +166,8 @@ int main(int argc, char **argv) int major, minor; char *ver, *date, *time; //input matrix and rhs/solution - int n, nnz, block_dimx, block_dimy, block_size, num_neighbors; - int *row_ptrs = NULL, *neighbors = NULL; + int n, nnz, block_dimx, block_dimy, block_size; + int *row_ptrs = NULL; void *col_indices = NULL; void *values = NULL, *diag = NULL, *dh_x = NULL, *dh_b = NULL; int *h_row_ptrs = NULL; diff --git a/include/global_thread_handle.h b/include/global_thread_handle.h index 3b71875c..fa053fe4 100644 --- a/include/global_thread_handle.h +++ b/include/global_thread_handle.h @@ -141,7 +141,7 @@ class MemoryPool //Mutex added to fix ICE threadsafe issue std::mutex m_mutex2; -#ifndef USE_CUDAMALLOCASYNC +#ifdef USE_CUDAMALLOCASYNC cudaMemPool_t m_mem_pool; #endif diff --git a/src/aggregation/aggregation_amg_level.cu b/src/aggregation/aggregation_amg_level.cu index f93a2b40..e5984eb2 100644 --- a/src/aggregation/aggregation_amg_level.cu +++ b/src/aggregation/aggregation_amg_level.cu @@ -2386,11 +2386,7 @@ void Aggregation_AMG_Level_Base::consolidateCoarseGridMatrix() Matrix &A = this->getA(); Matrix &Ac = this->getNextLevel( MemorySpace( ) )->getA(); - int num_parts, num_fine_neighbors, my_id; - - num_parts = A.manager->getComms()->get_num_partitions(); - num_fine_neighbors = A.manager->neighbors.size(); - my_id = A.manager->global_id(); + int my_id = A.manager->global_id(); IVector_h &destination_part = A.manager->getDestinationPartitions(); int my_destination_part = A.manager->getMyDestinationPartition(); diff --git a/src/distributed/comms_mpi_hostbuffer_stream.cu b/src/distributed/comms_mpi_hostbuffer_stream.cu index 02fd38c6..da877bc8 100644 --- a/src/distributed/comms_mpi_hostbuffer_stream.cu +++ b/src/distributed/comms_mpi_hostbuffer_stream.cu @@ -1427,25 +1427,25 @@ void CommsMPIHostBufferStream::recv_vector_wait_all(HZVector &a) { rec template int CommsMPIHostBufferStream::get_num_partitions() { - int total = 0; #ifdef AMGX_WITH_MPI + int total = 0; MPI_Comm_size( mpi_comm, &total ); + return total; #else FatalError("MPI Comms module requires compiling with MPI", AMGX_ERR_NOT_IMPLEMENTED); #endif - return total; } template int CommsMPIHostBufferStream::get_global_id() { - int rank = 0; #ifdef AMGX_WITH_MPI + int rank = 0; MPI_Comm_rank( mpi_comm, &rank); + return rank; #else FatalError("MPI Comms module requires compiling with MPI", AMGX_ERR_NOT_IMPLEMENTED); #endif - return rank; } diff --git a/src/global_thread_handle.cu b/src/global_thread_handle.cu index 326645f5..a64f9cb4 100644 --- a/src/global_thread_handle.cu +++ b/src/global_thread_handle.cu @@ -83,7 +83,7 @@ MemoryPool::MemoryPool(size_t max_block_size, size_t page_size, size_t max_size) { //initializeCriticalSection(&m_mutex2); -#ifndef USE_CUDAMALLOCASYNC +#ifdef USE_CUDAMALLOCASYNC int device; cudaGetDevice(&device); cudaDeviceGetMemPool(&m_mem_pool, device); @@ -846,7 +846,7 @@ cudaError_t cudaFreeHost(void *ptr) cudaError_t cudaMallocAsync(void **ptr, size_t size, cudaStream_t stream) { -#ifndef USE_CUDAMALLOCASYNC +#ifdef USE_CUDAMALLOCASYNC return ::cudaMallocAsync(ptr, size, stream); @@ -961,7 +961,7 @@ cudaError_t cudaMallocAsync(void **ptr, size_t size, cudaStream_t stream) cudaError_t cudaFreeAsync(void *ptr, cudaStream_t stream) { -#ifndef USE_CUDAMALLOCASYNC +#ifdef USE_CUDAMALLOCASYNC return ::cudaFreeAsync(ptr, stream); diff --git a/src/matrix.cu b/src/matrix.cu index 9157151f..1a86e37f 100644 --- a/src/matrix.cu +++ b/src/matrix.cu @@ -415,7 +415,6 @@ Matrix< TemplateConfig >::print(char fprintf(fid, "%d %d %d\n", this->get_num_rows() * this->get_block_dimx(), this->get_num_cols() * this->get_block_dimy(), tnnz); auto trafI = [&](auto const &I, auto const &i) { return I * this->get_block_dimy() + i + 1; }; - auto trafJ = [&](auto const &J, auto const &j) { return J * this->get_block_dimx() + j + 1; }; for (i = printRowsStart; i < printRowsEnd; i++) {