Route choice refactor (#547)

* avoids division by zero erro * avoids division by zero erro * fixes tests * Prepares None for ODs with real flows only * Add detection of zero cost paths * Improve error reporting * Beginning of the refactor, add RouteChoiceSetResults class * Move compute_cost and compute_mask over to new class * Add remaining compute functions, make them const-correct * Add table construction, some docs, and link loading * Hack the batched method up to begin testing * WIP [select] link loading * Add COO demand holder * Work on adding COO demand format * Port tests to new demand system * Add start of LinkLoadingResults class * Fix indexing issue * Add link loading support * Disable bounds checking and initialised check, not relevant here * Remove old code * Fixes compilation on Windows. Should work in all systems tesseract-ocr/tesseract#3285 * Small bug and typo * Improves DF build from matrix * Improves DF build from matrix * Bug fix * Sorts results and ensures no repeated columns in the demand cube * Manual merge of select link AND sets * Select link support * Begin updating tests * Use smart pointers for COO * Add select link OD matrix support * Fix segfault, early compute results and cache them * Update tests, fix tests, fix memory error, add shape parameter * Update docs * Skip unimplemented tests, remove unused args * Fix sparse test * Move docs over to RouteChoice object instead of RouteChoiceSet * .A -> .toarray(), must be an old function * fixup! .A -> .toarray(), must be an old function * move * Split apart route choice module * Factor out some common args in setup.py, add cython-lint line length --------- Co-authored-by: pveigadecamargo <pveigadecamargo@anl.gov>
AequilibraE · Jul 26, 2024 · 313b6ea · 313b6ea
1 parent 11e6307
commit 313b6ea
Show file tree

Hide file tree

Showing 31 changed files with 2,899 additions and 1,982 deletions.
diff --git a/aequilibrae/matrix/sparse_matrix.pxd b/aequilibrae/matrix/sparse_matrix.pxd
@@ -1,13 +1,43 @@
 from libcpp.vector cimport vector
+from libcpp.memory cimport unique_ptr, make_unique
+from libcpp cimport bool
 
 cdef class Sparse:
     pass
 
+cdef struct COO_f64_struct:
+    unique_ptr[vector[size_t]] row
+    unique_ptr[vector[size_t]] col
+    unique_ptr[vector[double]] f64_data
+
+cdef struct COO_f32_struct:
+    unique_ptr[vector[size_t]] row
+    unique_ptr[vector[size_t]] col
+    unique_ptr[vector[float]] f32_data
+
 cdef class COO(Sparse):
     cdef:
-        vector[size_t] *row
-        vector[size_t] *col
-        vector[double] *data
-        readonly object shape
+        unique_ptr[vector[size_t]] row
+        unique_ptr[vector[size_t]] col
+        bool f64
+        unique_ptr[vector[double]] f64_data
+        unique_ptr[vector[float]] f32_data
+        public object shape
+
+    cdef void f64_append(COO self, size_t i, size_t j, double v) noexcept nogil
+    cdef void f32_append(COO self, size_t i, size_t j, float v) noexcept nogil
+
+    @staticmethod
+    cdef void init_f64_struct(COO_f64_struct &struct) noexcept nogil
+    @staticmethod
+    cdef void init_f32_struct(COO_f32_struct &struct) noexcept nogil
+
+    @staticmethod
+    cdef object from_f64_struct(COO_f64_struct &struct)
+    @staticmethod
+    cdef object from_f32_struct(COO_f32_struct &struct)
 
-    cdef void append(COO self, size_t i, size_t j, double v) noexcept nogil
+    @staticmethod
+    cdef void f64_struct_append(COO_f64_struct &struct, size_t i, size_t j, double v) noexcept nogil
+    @staticmethod
+    cdef void f32_struct_append(COO_f32_struct &struct, size_t i, size_t j, float v) noexcept nogil
diff --git a/aequilibrae/matrix/sparse_matrix.pyx b/aequilibrae/matrix/sparse_matrix.pyx
@@ -1,10 +1,12 @@
 from libcpp.vector cimport vector
+from libcpp.utility cimport move
 from libcpp cimport nullptr
 from cython.operator cimport dereference as d
 
 import scipy.sparse
 import numpy as np
 import openmatrix as omx
+import cython
 
 cdef class Sparse:
     """
@@ -57,66 +59,130 @@ cdef class COO(Sparse):
     A class to implement sparse matrix operations such as reading, writing, and indexing
     """
 
-    def __cinit__(self):
+    def __cinit__(self, shape=None, f64: bool = True):
         """C level init. For C memory allocation and initialisation. Called exactly once per object."""
+        self.shape = shape
+        self.f64 = f64
 
-        self.row = new vector[size_t]()
-        self.col = new vector[size_t]()
-        self.data = new vector[double]()
+        self.row = make_unique[vector[size_t]]()
+        self.col = make_unique[vector[size_t]]()
 
-    def __init__(self, shape=None):
-        """Python level init, may be called multiple times, for things that can't be done in __cinit__."""
+        if self.f64:
+            self.f64_data = make_unique[vector[double]]()
+        else:
+            self.f32_data = make_unique[vector[float]]()
 
-        self.shape = shape
+    def __init__(self, *args, **kwargs):
+        pass
 
-    def __dealloc__(self):
+    def to_scipy(self, shape=None):
         """
-        C level deallocation. For freeing memory allocated by this object. *Must* have GIL, `self` may be in a
-        partially deallocated state already.
+        Create scipy.sparse.coo_matrix from this COO matrix.
         """
+        cdef:
+            size_t[:] row, col
+            double[:] f64_data
+            float[:] f32_data
+            size_t length
 
-        del self.row
-        self.row = <vector[size_t] *>nullptr
-
-        del self.col
-        self.col = <vector[size_t] *>nullptr
+        # We can't construct a 0x0 matrix from 3x 0-sized arrays but we can tell scipy to make one.
+        if not d(self.row).size() or not d(self.col).size():
+            return scipy.sparse.coo_matrix((0, 0))
 
-        del self.data
-        self.data = <vector[double] *>nullptr
+        length = d(self.row).size()
+        row = <size_t[:length]>d(self.row).data()
 
-    def to_scipy(self, shape=None, dtype=np.float64):
-        """
-        Create scipy.sparse.coo_matrix from this COO matrix.
-        """
-        row = <size_t[:self.row.size()]>&d(self.row)[0]
-        col = <size_t[:self.col.size()]>&d(self.col)[0]
-        data = <double[:self.data.size()]>&d(self.data)[0]
+        length = d(self.col).size()
+        col = <size_t[:length]>d(self.col).data()
 
         if shape is None:
             shape = self.shape
 
-        return scipy.sparse.coo_matrix((data, (row, col)), dtype=dtype, shape=shape)
+        if self.f64:
+            length = d(self.f64_data).size()
+            f64_data = <double[:length]>d(self.f64_data).data()
+            return scipy.sparse.coo_matrix((f64_data, (row, col)), dtype=np.float64, shape=shape, copy=True)
+        else:
+            length = d(self.f32_data).size()
+            f32_data = <float[:length]>d(self.f32_data).data()
+            return scipy.sparse.coo_matrix((f32_data, (row, col)), dtype=np.float32, shape=shape, copy=True)
 
     @classmethod
     def from_matrix(cls, m):
         """
         Create COO matrix from an dense or scipy-like matrix.
         """
         if not isinstance(m, scipy.sparse.coo_matrix):
-            m = scipy.sparse.coo_matrix(m)
+            m = scipy.sparse.coo_matrix(m, dtype=m.dtype)
+
+        self = <COO?>cls(f64=m.data.dtype == "float64")
+
+        cdef size_t[:] row = m.row.astype(np.uint64), col = m.col.astype(np.uint64)
+        cdef double[:] f64_data
+        cdef float[:] f32_data
 
-        self = <COO?>cls()
+        d(self.row).insert(d(self.row).begin(), &row[0], &row[-1] + 1)
+        d(self.col).insert(d(self.col).begin(), &col[0], &col[-1] + 1)
 
-        cdef size_t[:] row = m.row.astype(np.uint64), col = m.row.astype(np.uint64)
-        cdef double[:] data = m.data
+        if self.f64:
+            f64_data = m.data
+            d(self.f64_data).insert(d(self.f64_data).begin(), &f64_data[0], &f64_data[-1] + 1)
+        else:
+            f32_data = m.data
+            d(self.f32_data).insert(d(self.f32_data).begin(), &f32_data[0], &f32_data[-1] + 1)
+
+        return self
+
+    @staticmethod
+    cdef void init_f64_struct(COO_f64_struct &struct) noexcept nogil:
+        struct.row = make_unique[vector[size_t]]()
+        struct.col = make_unique[vector[size_t]]()
+        struct.f64_data = make_unique[vector[double]]()
+
+    @staticmethod
+    cdef void init_f32_struct(COO_f32_struct &struct) noexcept nogil:
+        struct.row = make_unique[vector[size_t]]()
+        struct.col = make_unique[vector[size_t]]()
+        struct.f32_data = make_unique[vector[float]]()
+
+    @staticmethod
+    cdef object from_f64_struct(COO_f64_struct &struct):
+        cdef COO self = COO.__new__(COO)
+        self.row = move(struct.row)
+        self.col = move(struct.col)
+        self.f64 = True
+        self.f64_data = move(struct.f64_data)
+
+        return self
 
-        self.row.insert(self.row.end(), &row[0], &row[-1] + 1)
-        self.col.insert(self.col.end(), &col[0], &col[-1] + 1)
-        self.data.insert(self.data.end(), &data[0], &data[-1] + 1)
+    @staticmethod
+    cdef object from_f32_struct(COO_f32_struct &struct):
+        cdef COO self = COO.__new__(COO)
+        self.row = move(struct.row)
+        self.col = move(struct.col)
+        self.f64 = False
+        self.f32_data = move(struct.f32_data)
 
         return self
 
-    cdef void append(COO self, size_t i, size_t j, double v) noexcept nogil:
-        self.row.push_back(i)
-        self.col.push_back(j)
-        self.data.push_back(v)
+    cdef void f64_append(COO self, size_t i, size_t j, double v) noexcept nogil:
+        d(self.row).push_back(i)
+        d(self.col).push_back(j)
+        d(self.f64_data).push_back(v)
+
+    cdef void f32_append(COO self, size_t i, size_t j, float v) noexcept nogil:
+        d(self.row).push_back(i)
+        d(self.col).push_back(j)
+        d(self.f32_data).push_back(v)
+
+    @staticmethod
+    cdef void f64_struct_append(COO_f64_struct &struct, size_t i, size_t j, double v) noexcept nogil:
+        d(struct.row).push_back(i)
+        d(struct.col).push_back(j)
+        d(struct.f64_data).push_back(v)
+
+    @staticmethod
+    cdef void f32_struct_append(COO_f32_struct &struct, size_t i, size_t j, float v) noexcept nogil:
+        d(struct.row).push_back(i)
+        d(struct.col).push_back(j)
+        d(struct.f32_data).push_back(v)
diff --git a/aequilibrae/paths/AoN.pyx → aequilibrae/paths/cython/AoN.pyx b/aequilibrae/paths/AoN.pyx → aequilibrae/paths/cython/AoN.pyx
diff --git a/aequilibrae/paths/basic_path_finding.pyx → ...ibrae/paths/cython/basic_path_finding.pyx b/aequilibrae/paths/basic_path_finding.pyx → ...ibrae/paths/cython/basic_path_finding.pyx
diff --git a/aequilibrae/paths/bpr.pyx → aequilibrae/paths/cython/bpr.pyx b/aequilibrae/paths/bpr.pyx → aequilibrae/paths/cython/bpr.pyx
diff --git a/aequilibrae/paths/bpr2.pyx → aequilibrae/paths/cython/bpr2.pyx b/aequilibrae/paths/bpr2.pyx → aequilibrae/paths/cython/bpr2.pyx
diff --git a/aequilibrae/paths/conical.pyx → aequilibrae/paths/cython/conical.pyx b/aequilibrae/paths/conical.pyx → aequilibrae/paths/cython/conical.pyx
diff --git a/aequilibrae/paths/cython/coo_demand.pxd b/aequilibrae/paths/cython/coo_demand.pxd
@@ -0,0 +1,14 @@
+from libcpp.vector cimport vector
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport pair
+
+cdef class GeneralisedCOODemand:
+    cdef:
+        public object df
+        readonly object f64_names
+        readonly object f32_names
+        readonly object shape
+        readonly object nodes_to_indices
+        vector[pair[long long, long long]] ods
+        vector[unique_ptr[vector[double]]] f64
+        vector[unique_ptr[vector[float]]] f32
diff --git a/aequilibrae/paths/cython/coo_demand.pyx b/aequilibrae/paths/cython/coo_demand.pyx
@@ -0,0 +1,109 @@
+import numpy as np
+import pandas as pd
+import logging
+from scipy.sparse import coo_matrix as sp_coo_matrix
+
+cdef class GeneralisedCOODemand:
+    def __init__(self, origin_col: str, destination_col: str, nodes_to_indices, shape=None):
+        """
+        A container for access to the float64 and float32 fields of a data frame.
+        """
+        self.df = pd.DataFrame(columns=[origin_col, destination_col]).set_index([origin_col, destination_col])
+        self.shape = shape
+        self.nodes_to_indices = nodes_to_indices
+
+    def add_df(self, dfs: Union[pd.DataFrame, List[pd.DataFrame]], shape=None, fill: float = 0.0):
+        """
+        Add a DataFrame to the existing ones.
+
+        Expects a DataFrame with a multi-index of (o, d).
+        """
+        if isinstance(dfs, pd.DataFrame):
+            dfs = (dfs,)
+
+        if shape is None and self.shape is None:
+            raise ValueError("a shape must be provided initially to prevent oddly sized sparse matrices")
+        if shape is not None and self.shape is None:
+            self.shape = shape
+        if shape is not None and self.shape is not None and shape != self.shape:
+            raise ValueError(f"provided shape ({shape}) differs from previous shape ({self.shape})")
+
+        new_dfs = [self.df]
+        for df in dfs:
+            if df.index.nlevels != 2:
+                raise ValueError("provided pd.DataFrame doesn't have a 2-level multi-index")
+            elif df.index.names != self.df.index.names:
+                raise ValueError(f"mismatched index names. Expect {self.df.index.names}, provided {df.index.names}")
+
+            shape = self.nodes_to_indices[df.index.to_frame(index=False)].max(axis=0)
+            if shape[0] >= self.shape[0] or shape[1] >= self.shape[1]:
+                raise ValueError(f"inferred max index ({(shape[0], shape[1])}) exceeds provided shape ({self.shape})")
+
+            new_dfs.append(df.select_dtypes(["float64", "float32"]))
+
+        self.df = pd.concat(new_dfs, axis=1).fillna(fill).sort_index()
+
+    def add_matrix(self, matrix: AequilibraeMatrix, shape=None, fill: float = 0.0):
+        """
+        Add an AequilibraE matrix to the existing demand in a sparse manner.
+        """
+        dfs = []
+        for i, name in enumerate(matrix.view_names):
+            assert name not in self.df.columns, f"Matrix name ({name}) already exists in the matrix cube"
+            m = matrix.matrix_view if len(matrix.view_names) == 1 else matrix.matrix_view[:, :, i]
+            if np.nansum(m) == 0:
+                continue
+
+            coo_ = sp_coo_matrix(m)
+            df = pd.DataFrame(
+                data={
+                    self.df.index.names[0]: matrix.index[coo_.row],
+                    self.df.index.names[1]: matrix.index[coo_.col],
+                    name: coo_.data,
+                },
+            ).set_index([self.df.index.names[0], self.df.index.names[1]])
+            dfs.append(df.dropna())
+
+        self.add_df(dfs, shape=shape, fill=fill)
+        logging.info(f"There {len(self.df):,} are OD pairs with non-zero flows")
+
+    def _initalise_c_data(self):
+        self.ods = self.df.index  # MultiIndex[int, int] -> vector[pair[long long, long long]]
+
+        self.f64.clear()
+        self.f32.clear()
+        self.f64_names = []
+        self.f32_names = []
+
+        cdef:
+            double[::1] f64_array
+            float[::1] f32_array
+            vector[double] *f64_vec
+            vector[float] *f32_vec
+
+        for col in self.df:
+            if self.df.dtypes[col] == "float64":
+                f64_array = self.df[col].to_numpy()
+                self.f64_names.append(col)
+
+                # The unique pointer will take ownership of this allocation
+                f64_vec = new vector[double]()
+                f64_vec.insert(f64_vec.begin(), &f64_array[0], &f64_array[0] + len(f64_array))
+                self.f64.emplace_back(f64_vec)  # From here f63_vec should not be accessed. It is owned by the unique pointer
+
+            elif self.df.dtypes[col] == "float32":
+                f32_array = self.df[col].to_numpy()
+                self.f32_names.append(col)
+
+                # The unique pointer will take ownership of this allocation
+                f32_vec = new vector[float]()
+                f32_vec.insert(f32_vec.begin(), &f32_array[0], &f32_array[0] + len(f32_array))
+                self.f32.emplace_back(f32_vec)  # From here f32_vec should not be accessed. It is owned by the unique pointer
+            else:
+                raise TypeError(f"non-floating point column ({col}) in self.df. Something has gone wrong")
+
+    def no_demand(GeneralisedCOODemand self) -> bool:
+        return len(self.df.columns) == 0
+
+    def is_empty(self) -> bool:
+        return self.df.index.empty
diff --git a/aequilibrae/paths/graph_building.pyx → aequilibrae/paths/cython/graph_building.pyx b/aequilibrae/paths/graph_building.pyx → aequilibrae/paths/cython/graph_building.pyx
diff --git a/aequilibrae/paths/hyperpath.pyx → aequilibrae/paths/cython/hyperpath.pyx b/aequilibrae/paths/hyperpath.pyx → aequilibrae/paths/cython/hyperpath.pyx
diff --git a/aequilibrae/paths/inrets.pyx → aequilibrae/paths/cython/inrets.pyx b/aequilibrae/paths/inrets.pyx → aequilibrae/paths/cython/inrets.pyx
diff --git a/aequilibrae/paths/parallel_numpy.pyx → aequilibrae/paths/cython/parallel_numpy.pyx b/aequilibrae/paths/parallel_numpy.pyx → aequilibrae/paths/cython/parallel_numpy.pyx
@@ -295,8 +295,8 @@ def assign_link_loads(actual_links, compressed_links, crosswalk, cores):
 @cython.wraparound(False)
 @cython.embedsignature(True)
 @cython.boundscheck(False)
-cpdef void assign_link_loads_cython(double[:, :] actual,
-                                    double[:, :] compressed,
+cpdef void assign_link_loads_cython(cython.floating[:, :] actual,
+                                    cython.floating[:, :] compressed,
                                     long long[:] crosswalk,
                                     int cores) noexcept:
     cdef long long i, j, k

diff --git a/aequilibrae/paths/parameters.pxi → aequilibrae/paths/cython/parameters.pxi b/aequilibrae/paths/parameters.pxi → aequilibrae/paths/cython/parameters.pxi
diff --git a/aequilibrae/paths/path_file_saving.pyx → ...ilibrae/paths/cython/path_file_saving.pyx b/aequilibrae/paths/path_file_saving.pyx → ...ilibrae/paths/cython/path_file_saving.pyx
diff --git a/aequilibrae/paths/pq_4ary_heap.pyx → aequilibrae/paths/cython/pq_4ary_heap.pyx b/aequilibrae/paths/pq_4ary_heap.pyx → aequilibrae/paths/cython/pq_4ary_heap.pyx
diff --git a/aequilibrae/paths/public_transport.pyx → ...ilibrae/paths/cython/public_transport.pyx b/aequilibrae/paths/public_transport.pyx → ...ilibrae/paths/cython/public_transport.pyx