From 937c2896850298fd6004ad948a2ede9dd32794d7 Mon Sep 17 00:00:00 2001 From: Hiyu Shintani Date: Thu, 20 Jun 2019 16:01:36 +0200 Subject: [PATCH 1/2] Array2d / Sparse2d interop --- lib/include/tick/array/array2d.h | 78 ++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/lib/include/tick/array/array2d.h b/lib/include/tick/array/array2d.h index 641ab80fd..e5abb5bfa 100644 --- a/lib/include/tick/array/array2d.h +++ b/lib/include/tick/array/array2d.h @@ -13,6 +13,9 @@ class SArray2d; template class SparseArray2d; +template +class SSparseArray2d; + /*! \class Array2d * \brief Template class for basic non sparse 2d arrays of type `T`. * @@ -51,6 +54,81 @@ class Array2d : public BaseArray2d { using AbstractArray1d2d::is_sparse; using AbstractArray1d2d::init_to_zero; + // implement this properly + explicit Array2d(std::vector> data) { + allocVector2D_Data(data); + } + + Array2d& operator=(std::vector> data) { + allocVector2D_Data(data); + return *this; + } + + void allocVector2D_Data(std::vector> data) { + if (data.size() == 0) { + TICK_ERROR("data empty"); + return; + } + + if (is_data_allocation_owned) + TICK_PYTHON_FREE(_data); + + _n_cols = data[0].size(); + _n_rows = data.size(); + _size = _n_cols * _n_rows; + + is_data_allocation_owned = true; + TICK_PYTHON_MALLOC(_data, T, _size); + + ulong index = 0; + for (std::vector vec : data) { + if (vec.size() != _n_cols) + TICK_ERROR("non consistent column length"); + memcpy(_data + index, vec.data(), _n_cols * sizeof(T)); + index+=_n_cols * sizeof(T) / sizeof(void*); + } + } + + std::shared_ptr> toSSparseArray2dPtr() { + std::vector data; + std::vector row_idx(_n_rows + 1); + std::vector col; + + ulong nnz = 0; + + row_idx[0] = 0; + for (uint r = 0; r < _n_rows; r++) { + int nnz_row = 0; + for (uint c = 0; c < _n_cols; c++) { + if (operator()(r, c) != (T)0) { + T val = operator()(r, c); + nnz++; + nnz_row++; + data.push_back(val); + col.push_back(c); + } + } + row_idx[r + 1] = row_idx[r] + nnz_row; + } + + uint* row_ptr = new uint[row_idx.size()]; + uint* col_ptr = new uint[col.size()]; + T* data_ptr = new T[data.size()]; + + memcpy(row_ptr, row_idx.data(), row_idx.size() * sizeof(uint)); + memcpy(col_ptr, col.data(), col.size() * sizeof(uint)); + memcpy(data_ptr, data.data(), data.size() * sizeof(T)); + + std::shared_ptr> arrayptr = + SSparseArray2d::new_ptr(0, 0, 0); + + arrayptr->set_data_indices_rowindices(data_ptr, col_ptr, row_ptr, _n_rows, _n_cols); + return arrayptr; + } + + + // + //! @brief Constructor for an empty array. Array2d() : BaseArray2d(true) {} From 4e3d77d81ecbe0bc6f6e247890c77c729fdc0529 Mon Sep 17 00:00:00 2001 From: dekken Date: Sat, 6 Jul 2019 15:57:52 +0200 Subject: [PATCH 2/2] Random Sparse2d / Dense to sparse --- lib/cpp-test/array/array_gtest.cpp | 13 ++++- lib/cpp-test/array/atomic_array_gtest.cpp | 2 +- lib/cpp-test/array/linear_system_gtest.cpp | 2 +- lib/include/tick/array/array2d.h | 34 ++++++++++++ lib/include/tick/array/sparse2d/random2d.h | 62 ++++++++++++++++++++++ lib/include/tick/array/sparsearray2d.h | 4 ++ 6 files changed, 113 insertions(+), 4 deletions(-) create mode 100644 lib/include/tick/array/sparse2d/random2d.h diff --git a/lib/cpp-test/array/array_gtest.cpp b/lib/cpp-test/array/array_gtest.cpp index 0b0cd2656..8ca957be5 100644 --- a/lib/cpp-test/array/array_gtest.cpp +++ b/lib/cpp-test/array/array_gtest.cpp @@ -37,7 +37,7 @@ class ArrayTest : public ::testing::Test { typedef ::testing::Types MyArrayTypes; -TYPED_TEST_CASE(ArrayTest, MyArrayTypes); +TYPED_TEST_SUITE(ArrayTest, MyArrayTypes); template class Array2dTest : public ::testing::Test { @@ -49,7 +49,7 @@ typedef ::testing::Types MyArray2dTypes; -TYPED_TEST_CASE(Array2dTest, MyArray2dTypes); +TYPED_TEST_SUITE(Array2dTest, MyArray2dTypes); TYPED_TEST(ArrayTest, InitToZero) { TypeParam arr{TICK_TEST_DATA_SIZE}; @@ -626,6 +626,15 @@ TYPED_TEST(Array2dTest, SerializationBinary) { TypeParam>(); } +TEST(SparseTesting, RandomSparse2d) { + auto random_sparse = SparseArray2d::RANDOM(100, 100, .33); + auto dense = random_sparse->as_array2d(); + auto sparse = dense.as_sparsearray2d(); + ASSERT_EQ(random_sparse->size(), sparse->size()); + ASSERT_EQ(random_sparse->n_rows(), sparse->n_rows()); + ASSERT_EQ(random_sparse->n_cols(), sparse->n_cols()); +} + #ifdef ADD_MAIN int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); diff --git a/lib/cpp-test/array/atomic_array_gtest.cpp b/lib/cpp-test/array/atomic_array_gtest.cpp index c446c13c3..c2b173945 100644 --- a/lib/cpp-test/array/atomic_array_gtest.cpp +++ b/lib/cpp-test/array/atomic_array_gtest.cpp @@ -163,7 +163,7 @@ class AtomicArrayTest : public ::testing::Test { }; typedef ::testing::Types, Array> MyArrayTypes; -TYPED_TEST_CASE(AtomicArrayTest, MyArrayTypes); +TYPED_TEST_SUITE(AtomicArrayTest, MyArrayTypes); TYPED_TEST(AtomicArrayTest, InitToZero) { TypeParam arr{TICK_TEST_DATA_SIZE}; diff --git a/lib/cpp-test/array/linear_system_gtest.cpp b/lib/cpp-test/array/linear_system_gtest.cpp index dff6f7c62..5d9926fe3 100644 --- a/lib/cpp-test/array/linear_system_gtest.cpp +++ b/lib/cpp-test/array/linear_system_gtest.cpp @@ -25,7 +25,7 @@ class LinearSystemTest : public ::testing::Test { }; typedef ::testing::Types MyArrayTypes; -TYPED_TEST_CASE(LinearSystemTest, MyArrayTypes); +TYPED_TEST_SUITE(LinearSystemTest, MyArrayTypes); diff --git a/lib/include/tick/array/array2d.h b/lib/include/tick/array/array2d.h index e5abb5bfa..7d4a8ed54 100644 --- a/lib/include/tick/array/array2d.h +++ b/lib/include/tick/array/array2d.h @@ -270,6 +270,8 @@ class Array2d : public BaseArray2d { // The definition is in the file sarray.h std::shared_ptr> as_sarray2d_ptr(); + std::shared_ptr> as_sparsearray2d() const; + public: bool compare(const Array2d& that) const { bool are_equal = BaseArray2d::compare(that); @@ -562,4 +564,36 @@ inline std::ostream &operator<<(std::ostream &s, const std::vector &p) { return s << typeid(p).name() << "<" << typeid(T).name() << ">"; } +template +std::shared_ptr> Array2d::as_sparsearray2d() const { + T zero {0}; + auto this_data = this->data(); + size_t _n_rows = this->n_rows(), _n_cols = this->n_cols(), nnz = 0, size = 0; + for (size_t r = 0; r < _n_rows; r++) { + for (size_t c = 0; c < _n_cols; c++) { + T val {0}; + if ((val = this_data[(r * _n_cols) + c]) != zero) size++; + } + } + auto sparse = SSparseArray2d::new_ptr(_n_rows, _n_cols, size); + auto *data = sparse->data(); + auto *indices = sparse->indices(); + auto *row_indices = sparse->row_indices(); + row_indices[0] = 0; + for (size_t r = 0; r < _n_rows; r++) { + size_t nnz_row = 0; + for (size_t c = 0; c < _n_cols; c++) { + T val {0}; + if ((val = this_data[(r * _n_cols) + c]) != zero) { + data[nnz] = val; + indices[nnz] = c; + nnz++; + nnz_row++; + } + } + row_indices[r + 1] = row_indices[r] + nnz_row; + } + return sparse; +} + #endif // LIB_INCLUDE_TICK_ARRAY_ARRAY2D_H_ diff --git a/lib/include/tick/array/sparse2d/random2d.h b/lib/include/tick/array/sparse2d/random2d.h new file mode 100644 index 000000000..70d7c7957 --- /dev/null +++ b/lib/include/tick/array/sparse2d/random2d.h @@ -0,0 +1,62 @@ + +#ifndef LIB_INCLUDE_TICK_ARRAY_SPARSE2D_RANDOM2D_H_ +#define LIB_INCLUDE_TICK_ARRAY_SPARSE2D_RANDOM2D_H_ + +#include + +template +std::shared_ptr> SparseArray2d::RANDOM(size_t rows, size_t cols, T density, T seed) { + if (density < 0 || density > 1) + throw std::runtime_error("Invalid sparse density, must be between 0 and 1"); + + size_t size = std::floor(rows * cols * density); + auto arr = SSparseArray2d::new_ptr(rows, cols, size); + + std::mt19937_64 generator; + if (seed > 0) { + generator = std::mt19937_64(seed); + } else { + std::random_device r; + std::seed_seq seed_seq{r(), r(), r(), r(), r(), r(), r(), r()}; + generator = std::mt19937_64(seed_seq); + } + std::uniform_real_distribution dist; + auto data = arr->data(); + for (size_t i = 0; i < size; i++) data[i] = dist(generator); + + size_t nnz = size; + std::vector nnz_row(rows, 0); + + size_t index = 0; + while (nnz > 0) { + std::uniform_int_distribution dist_int(1, 100); // to do 50 50 + if (dist_int(generator) > 50) { + nnz_row[index]++; + nnz--; + } + index++; + if (index >= rows) index = 0; + } + + index = 0; + auto indices = arr->indices(); + for (size_t i : nnz_row) { + std::vector indice_comb; + for (size_t j = 0; j < cols; j++) indice_comb.emplace_back(j); + std::shuffle(indice_comb.begin(), indice_comb.end(), generator); + for (size_t j = 0; j < i; j++) { + indices[index++] = indice_comb[j]; + } + } + + // if (index != arr->indices().size() - 1) + // std::runtime_error("Uh something is wrong"); + + auto row_indices = arr->row_indices(); + row_indices[0] = 0; + for (size_t i = 1; i < rows + 1; i++) row_indices[i] = row_indices[i - 1] + nnz_row[i - 1]; + + return arr; + } + +#endif // LIB_INCLUDE_TICK_ARRAY_SPARSE2D_RANDOM2D_H_ diff --git a/lib/include/tick/array/sparsearray2d.h b/lib/include/tick/array/sparsearray2d.h index a54bdfe99..14011b595 100644 --- a/lib/include/tick/array/sparsearray2d.h +++ b/lib/include/tick/array/sparsearray2d.h @@ -103,6 +103,8 @@ class SparseArray2d : public BaseArray2d { //! called on a view std::shared_ptr> as_ssparsearray2d_ptr(); + static std::shared_ptr> RANDOM(size_t rows, size_t cols, T density, T seed = -1); + template void save(Archive &ar) const { ar(this->_size_sparse); @@ -277,4 +279,6 @@ SPARSE_ARRAY2D_DEFINE_TYPE(ulong, ULong); * @} */ +#include "tick/array/sparse2d/random2d.h" + #endif // LIB_INCLUDE_TICK_ARRAY_SPARSEARRAY2D_H_