Merge pull request #59 from Zinoex/fm/more_orthogonal_impls

More workspace types for OrthogonalIntervalProbabilities
Zinoex · Sep 11, 2024 · f1ab0bf · f1ab0bf
2 parents 24f0043 + 8e460d3
commit f1ab0bf
Show file tree

Hide file tree

Showing 9 changed files with 712 additions and 47 deletions.
diff --git a/ext/cuda/workspace.jl b/ext/cuda/workspace.jl
@@ -7,8 +7,10 @@ struct CuDenseWorkspace <: AbstractCuWorkspace
     max_actions::Int32
 end
 
-IntervalMDP.construct_workspace(::AbstractGPUMatrix, max_actions) =
-    CuDenseWorkspace(max_actions)
+IntervalMDP.construct_workspace(
+    prob::IntervalProbabilities{R, VR, MR},
+    max_actions = 1,
+) where {R, VR, MR <: AbstractGPUMatrix{R}} = CuDenseWorkspace(max_actions)
 
 ####################
 # Sparse workspace #
@@ -23,5 +25,8 @@ function CuSparseWorkspace(p::AbstractCuSparseMatrix, max_actions)
     return CuSparseWorkspace(max_nonzeros, max_actions)
 end
 
-IntervalMDP.construct_workspace(p::AbstractCuSparseMatrix, max_actions) =
-    CuSparseWorkspace(p, max_actions)
+IntervalMDP.construct_workspace(
+    prob::IntervalProbabilities{R, VR, MR},
+    max_actions = 1,
+) where {R, VR, MR <: AbstractCuSparseMatrix{R}} =
+    CuSparseWorkspace(max_actions, max_actions)
diff --git a/src/bellman.jl b/src/bellman.jl
@@ -32,7 +32,7 @@ Vcur = bellman(Vprev, prob; upper_bound = false)
 
 """
 function bellman(V, prob; upper_bound = false)
-    Vres = similar(V, num_source(prob))
+    Vres = similar(V, source_shape(prob))
     return bellman!(Vres, V, prob; upper_bound = upper_bound)
 end
 
@@ -293,7 +293,7 @@ function gap_value(Vp, sum_lower)
     return res
 end
 
-# Dense
+# Dense orthogonal
 function bellman!(
     workspace::DenseOrthogonalWorkspace,
     strategy_cache::AbstractStrategyCache,
@@ -309,15 +309,101 @@ function bellman!(
 
     # For each higher-level state in the product space
     for I in CartesianIndices(product_nstates[2:end])
-        perm = @view workspace.permutation[axes(V, 1)]
-        sortperm!(perm, @view(V[:, I]); rev = upper_bound, scratch = workspace.scratch)
-
-        copyto!(@view(workspace.first_level_perm[:, I]), perm)
+        sort_dense_orthogonal(workspace, workspace.first_level_perm, V, I, upper_bound)
     end
 
     # For each source state
     @inbounds for (jₛ_cart, jₛ_linear) in
                   zip(CartesianIndices(axes(V)), LinearIndices(axes(V)))
+        bellman_dense_orthogonal!(
+            workspace,
+            workspace.first_level_perm,
+            strategy_cache,
+            Vres,
+            V,
+            prob,
+            stateptr,
+            product_nstates,
+            jₛ_cart,
+            jₛ_linear;
+            upper_bound = upper_bound,
+            maximize = maximize,
+        )
+    end
+
+    return Vres
+end
+
+function bellman!(
+    workspace::ThreadedDenseOrthogonalWorkspace,
+    strategy_cache::AbstractStrategyCache,
+    Vres,
+    V,
+    prob::OrthogonalIntervalProbabilities,
+    stateptr;
+    upper_bound = false,
+    maximize = true,
+)
+    # Since sorting for the first level is shared among all higher levels, we can precompute it
+    product_nstates = num_target(prob)
+
+    # For each higher-level state in the product space
+    @threadstid tid for I in CartesianIndices(product_nstates[2:end])
+        ws = workspace.thread_workspaces[tid]
+        sort_dense_orthogonal(ws, workspace.first_level_perm, V, I, upper_bound)
+    end
+
+    # For each source state
+    I_linear = LinearIndices(axes(V))
+    @threadstid tid for jₛ_cart in CartesianIndices(axes(V))
+        # We can't use @threadstid over a zip, so we need to manually index
+        jₛ_linear = I_linear[jₛ_cart]
+
+        ws = workspace.thread_workspaces[tid]
+
+        bellman_dense_orthogonal!(
+            ws,
+            workspace.first_level_perm,
+            strategy_cache,
+            Vres,
+            V,
+            prob,
+            stateptr,
+            product_nstates,
+            jₛ_cart,
+            jₛ_linear;
+            upper_bound = upper_bound,
+            maximize = maximize,
+        )
+    end
+
+    return Vres
+end
+
+function sort_dense_orthogonal(workspace, first_level_perm, V, I, upper_bound)
+    @inbounds begin
+        perm = @view workspace.permutation[axes(V, 1)]
+        sortperm!(perm, @view(V[:, I]); rev = upper_bound, scratch = workspace.scratch)
+
+        copyto!(@view(first_level_perm[:, I]), perm)
+    end
+end
+
+function bellman_dense_orthogonal!(
+    workspace,
+    first_level_perm,
+    strategy_cache::AbstractStrategyCache,
+    Vres,
+    V,
+    prob::OrthogonalIntervalProbabilities,
+    stateptr,
+    product_nstates,
+    jₛ_cart,
+    jₛ_linear;
+    upper_bound = false,
+    maximize = true,
+)
+    @inbounds begin
         s₁, s₂ = stateptr[jₛ_linear], stateptr[jₛ_linear + 1]
         actions = @view workspace.actions[1:(s₂ - s₁)]
         for (i, jₐ) in enumerate(s₁:(s₂ - 1))
@@ -328,7 +414,8 @@ function bellman!(
 
                 # For the first dimension, we need to copy the values from V
                 v = orthogonal_inner_sorted_bellman!(
-                    @view(workspace.first_level_perm[:, I]),
+                    # Use shared first level permutation across threads
+                    @view(first_level_perm[:, I]),
                     @view(V[:, I]),
                     prob[1],
                     jₐ,
@@ -359,12 +446,10 @@ function bellman!(
 
         Vres[jₛ_cart] = extract_strategy!(strategy_cache, actions, V, jₛ_cart, s₁, maximize)
     end
-
-    return Vres
 end
 
 Base.@propagate_inbounds function orthogonal_inner_bellman!(
-    workspace::DenseOrthogonalWorkspace,
+    workspace::Union{DenseOrthogonalWorkspace, ThreadDenseOrthogonalWorkspace},
     V,
     prob,
     jₐ,
@@ -390,3 +475,168 @@ Base.@propagate_inbounds function orthogonal_inner_sorted_bellman!(
 
     return dot(V, lowerⱼ) + gap_value(V, gapⱼ, used, perm)
 end
+
+# Sparse orthogonal
+function bellman!(
+    workspace::SparseOrthogonalWorkspace,
+    strategy_cache::AbstractStrategyCache,
+    Vres,
+    V,
+    prob::OrthogonalIntervalProbabilities,
+    stateptr;
+    upper_bound = false,
+    maximize = true,
+)
+    # For each source state
+    @inbounds for (jₛ_cart, jₛ_linear) in
+                  zip(CartesianIndices(axes(V)), LinearIndices(axes(V)))
+        bellman_sparse_orthogonal!(
+            workspace,
+            strategy_cache,
+            Vres,
+            V,
+            prob,
+            stateptr,
+            jₛ_cart,
+            jₛ_linear;
+            upper_bound = upper_bound,
+            maximize = maximize,
+        )
+    end
+
+    return Vres
+end
+function bellman!(
+    workspace::ThreadedSparseOrthogonalWorkspace,
+    strategy_cache::AbstractStrategyCache,
+    Vres,
+    V,
+    prob::OrthogonalIntervalProbabilities,
+    stateptr;
+    upper_bound = false,
+    maximize = true,
+)
+    # For each source state
+    I_linear = LinearIndices(axes(V))
+    @threadstid tid for jₛ_cart in CartesianIndices(axes(V))
+        # We can't use @threadstid over a zip, so we need to manually index
+        jₛ_linear = I_linear[jₛ_cart]
+
+        ws = workspace.thread_workspaces[tid]
+
+        bellman_sparse_orthogonal!(
+            ws,
+            strategy_cache,
+            Vres,
+            V,
+            prob,
+            stateptr,
+            jₛ_cart,
+            jₛ_linear;
+            upper_bound = upper_bound,
+            maximize = maximize,
+        )
+    end
+
+    return Vres
+end
+
+function bellman_sparse_orthogonal!(
+    workspace,
+    strategy_cache::AbstractStrategyCache,
+    Vres,
+    V,
+    prob::OrthogonalIntervalProbabilities,
+    stateptr,
+    jₛ_cart,
+    jₛ_linear;
+    upper_bound = false,
+    maximize = true,
+)
+    @inbounds begin
+        s₁, s₂ = stateptr[jₛ_linear], stateptr[jₛ_linear + 1]
+        actions = @view workspace.actions[1:(s₂ - s₁)]
+        for (i, jₐ) in enumerate(s₁:(s₂ - 1))
+            nzinds_first = SparseArrays.nonzeroinds(@view(gap(prob[1])[:, jₐ]))
+            nzinds_per_prob =
+                [SparseArrays.nonzeroinds(@view(gap(p)[:, jₐ])) for p in prob[2:end]]
+
+            lower_nzvals_per_prob = [nonzeros(@view(lower(p)[:, jₐ])) for p in prob]
+            gap_nzvals_per_prob = [nonzeros(@view(gap(p)[:, jₐ])) for p in prob]
+            sum_lower_per_prob = [sum_lower(p)[jₐ] for p in prob]
+
+            nnz_per_prob = Tuple(nnz(@view(gap(p)[:, jₐ])) for p in prob)
+            Vₑ = [
+                @view(cache[1:nnz]) for
+                (cache, nnz) in zip(workspace.expectation_cache, nnz_per_prob[2:end])
+            ]
+
+            # For each higher-level state in the product space
+            for I in CartesianIndices(nnz_per_prob[2:end])
+                Isparse = CartesianIndex(Tuple(map(enumerate(Tuple(I))) do (d, i)
+                    nzinds_per_prob[d][i]
+                end))
+
+                # For the first dimension, we need to copy the values from V
+                v = orthogonal_sparse_inner_bellman!(
+                    workspace,
+                    @view(V[nzinds_first, Isparse]),
+                    lower_nzvals_per_prob[1],
+                    gap_nzvals_per_prob[1],
+                    sum_lower_per_prob[1],
+                    upper_bound,
+                )
+                Vₑ[1][I[1]] = v
+
+                # For the remaining dimensions, if "full", compute expectation and store in the next level
+                for d in 2:(ndims(prob) - 1)
+                    if I[d - 1] == nnz_per_prob[d]
+                        v = orthogonal_sparse_inner_bellman!(
+                            workspace,
+                            Vₑ[d - 1],
+                            lower_nzvals_per_prob[d],
+                            gap_nzvals_per_prob[d],
+                            sum_lower_per_prob[d],
+                            upper_bound,
+                        )
+                        Vₑ[d][I[d]] = v
+                    else
+                        break
+                    end
+                end
+            end
+
+            # Last dimension
+            v = orthogonal_sparse_inner_bellman!(
+                workspace,
+                Vₑ[end],
+                lower_nzvals_per_prob[end],
+                gap_nzvals_per_prob[end],
+                sum_lower_per_prob[end],
+                upper_bound,
+            )
+            actions[i] = v
+        end
+
+        Vres[jₛ_cart] = extract_strategy!(strategy_cache, actions, V, jₛ_cart, s₁, maximize)
+    end
+end
+
+Base.@propagate_inbounds function orthogonal_sparse_inner_bellman!(
+    workspace::SparseOrthogonalWorkspace,
+    V,
+    lower,
+    gap,
+    sum_lower,
+    upper_bound::Bool,
+)
+    Vp_workspace = @view workspace.values_gaps[1:length(gap)]
+    for (i, (v, p)) in enumerate(zip(V, gap))
+        Vp_workspace[i] = (v, p)
+    end
+
+    # rev=true for upper bound
+    sort!(Vp_workspace; rev = upper_bound, scratch = workspace.scratch)
+
+    return dot(V, lower) + gap_value(Vp_workspace, sum_lower)
+end
diff --git a/src/interval_probabilities.jl b/src/interval_probabilities.jl
@@ -167,6 +167,7 @@ sum_lower(p::IntervalProbabilities) = p.sum_lower
 Return the number of source states or source/action pairs.
 """
 num_source(p::IntervalProbabilities) = size(gap(p), 2)
+source_shape(p::IntervalProbabilities) = (num_source(p),)
 
 """
     axes_source(p::IntervalProbabilities)
@@ -220,7 +221,7 @@ target states along each axis.
 
 ### Fields
 - `probs::NTuple{N, P}`: A tuple of `IntervalProbabilities` transition probabilities along each axis.
-- `dims::NTuple{N, Int32}`: The dimensions of the orthogonal probabilities.
+- `source_dims::NTuple{N, Int32}`: The dimensions of the orthogonal probabilities for the source axis. This is flattened to a single dimension for indexing.
 
 ### Examples
 # TODO: Update example
@@ -229,7 +230,7 @@ target states along each axis.
 struct OrthogonalIntervalProbabilities{N, P <: IntervalProbabilities} <:
        AbstractIntervalProbabilities
     probs::NTuple{N, P}
-    dims::NTuple{N, Int32}
+    source_dims::NTuple{N, Int32}
 end
 
 """
@@ -272,6 +273,7 @@ sum_lower(p::OrthogonalIntervalProbabilities, i) = p.probs[i].sum_lower
 Return the number of source states or source/action pairs.
 """
 num_source(p::OrthogonalIntervalProbabilities) = num_source(first(p.probs))
+source_shape(p::OrthogonalIntervalProbabilities) = p.source_dims
 
 """
     axes_source(p::OrthogonalIntervalProbabilities)
@@ -288,3 +290,5 @@ Base.getindex(p::OrthogonalIntervalProbabilities, i) = p.probs[i]
 Base.lastindex(p::OrthogonalIntervalProbabilities) = ndims(p)
 Base.firstindex(p::OrthogonalIntervalProbabilities) = 1
 Base.length(p::OrthogonalIntervalProbabilities) = ndims(p)
+Base.iterate(p::OrthogonalIntervalProbabilities) = (p[1], 2)
+Base.iterate(p::OrthogonalIntervalProbabilities, i) = i > ndims(p) ? nothing : (p[i], i + 1)
diff --git a/src/strategy.jl b/src/strategy.jl
@@ -45,7 +45,10 @@ construct_strategy_cache(mp::IntervalMarkovProcess, config) =
 # Strategy cache for not storing policies - useful for dispatching
 struct NoStrategyCache <: AbstractStrategyCache end
 
-function construct_strategy_cache(::IntervalProbabilities, ::NoStrategyConfig)
+function construct_strategy_cache(
+    ::Union{IntervalProbabilities, OrthogonalIntervalProbabilities},
+    ::NoStrategyConfig,
+)
     return NoStrategyCache()
 end