Zinoex · JeffersonYeh · Mar 12, 2026 · Mar 12, 2026 · Zinoex · Mar 13, 2026
diff --git a/src/bellman.jl b/src/bellman.jl
@@ -1,5 +1,5 @@
 """
-    bellman(V, model; upper_bound = false, maximize = true)
+    expectation(V, model; upper_bound = false, maximize = true)
 
 Compute robust Bellman update with the value function `V` and the model `model`, e.g. [`IntervalMarkovDecisionProcess`](@ref),
 that upper or lower bounds the expectation of the value function `V`.
@@ -56,7 +56,7 @@ istates = [Int32(1)]
 model = IntervalMarkovDecisionProcess(transition_probs, istates)
 
 Vprev = [1.0, 2.0, 3.0]
-Vcur = IntervalMDP.bellman(Vprev, model; upper_bound = false)
+Vcur = IntervalMDP.expectation(Vprev, model; upper_bound = false)
 
 # output
 
@@ -71,15 +71,15 @@ Vcur = IntervalMDP.bellman(Vprev, model; upper_bound = false)
     For a hot-loop, it is more efficient to use `bellman!` and pass in pre-allocated objects.
 
 """
-function bellman(
+function expectation(
     V,
     model,
     alg::BellmanAlgorithm = default_bellman_algorithm(model);
     upper_bound = false,
     maximize = true,
     prop = nothing,
 )
-    Vres = similar(V, source_shape(model))
+    Vres = Array{eltype(V)}(undef, (action_values(model)..., state_values(model)...))
 
     return expectation!(
         Vres,
@@ -451,17 +451,9 @@ Base.@propagate_inbounds function state_expectation!(
     for jₐ in available(model, jₛ)
         ambiguity_set = marginal[jₐ, jₛ]
         budget = workspace.budget[sub2ind(marginal, jₐ, jₛ)]
-        workspace.actions[jₐ] =
+        Vres[jₐ, jₛ] =
             state_action_expectation(workspace, V, ambiguity_set, budget, upper_bound)
     end
-
-    Vres[jₛ] = extract_strategy!(
-        strategy_cache,
-        workspace.actions,
-        available(model, jₛ),
-        jₛ,
-        maximize,
-    )
 end
 
 

diff --git a/src/robust_value_iteration.jl b/src/robust_value_iteration.jl
@@ -205,7 +205,35 @@ function _value_iteration!(problem::AbstractIntervalMDPProblem, alg; callback =
     return value_function.current, k, value_function.previous, strategy_cache
 end
 
-function bellman_update!(workspace, strategy_cache, value_function, k, mp, spec)
+function bellman_update!(workspace, strategy_cache, value_function::StateValueFunction, k, mp, spec)
+
+    # 1. compute expectation for Q(s, a)
+    expectation!(
+        workspace,
+        select_strategy_cache(strategy_cache, k),
+        value_function.intermediate_state_action_value,
+        value_function.previous,
+        select_model(mp, k);  # For time-varying available and labelling functions
+        upper_bound = isoptimistic(spec),
+        maximize = ismaximize(spec),
+        prop = system_property(spec),
+    )
+
+    # 2. extract strategy and compute V(s) = max_a Q(s, a)
-    # 2. extract strategy and compute V(s) = max_a Q(s, a)
+    # 2. extract strategy and compute V'(s) = max_a Q(s, a)
-    # 2. extract strategy and compute V(s) = max_a Q(s, a)
+    # 2. extract strategy and compute V'(s) = max_a Q(s, a)
+    strategy!(
+        select_strategy_cache(strategy_cache, k),
+        value_function.current,
+        value_function.intermediate_state_action_value,
+        select_model(mp, k),
+        ismaximize(spec),
+    )
+
+    # 3. post process
-    # 3. post process
+    # 3. post process to compute V(s) = g(s, V'(s)) where the definition of g depends on the objective
-    # 3. post process
+    # 3. post process to compute V(s) = g(s, V'(s)) where the definition of g depends on the objective
+    step_postprocess_value_function!(value_function, spec)
+    step_postprocess_strategy_cache!(strategy_cache)
+end
+
+function bellman_update!(workspace, strategy_cache::NonOptimizingStrategyCache, value_function::StateValueFunction, k, mp, spec)
     expectation!(
         workspace,
         select_strategy_cache(strategy_cache, k),

diff --git a/src/strategy_cache.jl b/src/strategy_cache.jl
@@ -159,3 +159,26 @@ function _extract_strategy!(cur_strategy, values, available_actions, neutral, j
     @inbounds cur_strategy[jₛ] = opt_index
     return opt_val
 end
+
+
+function strategy!(
+    strategy_cache::OptimizingStrategyCache,
+    Vres::AbstractArray{R},
+    Q::AbstractArray{R},
+    model,
+    maximize,
+) where {R <: Real}
+
+    #TODO: can be threaded?
+    for jₛ in CartesianIndices(source_shape(model))
-    for jₛ in CartesianIndices(source_shape(model))
+    @inbounds for jₛ in CartesianIndices(source_shape(model))
-    for jₛ in CartesianIndices(source_shape(model))
+    @inbounds for jₛ in CartesianIndices(source_shape(model))
+
+        Vres[jₛ] = extract_strategy!(
+            strategy_cache,
+            Q[:, jₛ],
-            Q[:, jₛ],
+            @view(Q[:, jₛ]),
-            Q[:, jₛ],
+            @view(Q[:, jₛ]),
+            available(model, jₛ),
+            jₛ,
+            maximize,
+        )
+
+    end
+end
diff --git a/src/value.jl b/src/value.jl
@@ -12,8 +12,8 @@ function StateValueFunction(problem::AbstractIntervalMDPProblem)
     previous .= zero(valuetype(mp))
     current = copy(previous)
 
-    dim = Tuple(Iterators.flatten(zip(action_values(mp), state_values(mp))))
-    # interleaved concat gives shape: (a1, a2) , (s1, s2) => (a1, s1, a2, s2)
+    dim = (action_values(mp)..., state_values(mp)...)
+    # concat gives shape: (a1, a2) , (s1, s2) => (a1, a2, s1, s2)
     # (a, s) to access s more frequently due to column major
-    # (a, s) to access s more frequently due to column major
+    # (a, s) to access a more frequently due to column major
-    # (a, s) to access s more frequently due to column major
+    # (a, s) to access a more frequently due to column major
     # TODO: works for IMDP, need to check for fIMDP
     intermediate_state_action_value = arrayfactory(mp, valuetype(mp), dim)
@@ -45,7 +45,7 @@ end
 
 function StateActionValueFunction(problem::AbstractIntervalMDPProblem)
     mp = system(problem)
-    dim = Tuple(Iterators.flatten(zip(action_values(mp), state_values(mp))))
+    dim = (action_values(mp)..., state_values(mp)...)
     # TODO: works for IMDP, need to check for fIMDP
     previous = arrayfactory(mp, valuetype(mp), dim)
     previous .= zero(valuetype(mp))

diff --git a/src/workspace.jl b/src/workspace.jl
@@ -53,7 +53,6 @@ struct DenseIntervalOMaxWorkspace{T <: Real}
     budget::Vector{T}
     scratch::Vector{Int32}
     permutation::Vector{Int32}
-    actions::Vector{T}
 end
 
 function DenseIntervalOMaxWorkspace(
@@ -63,8 +62,7 @@ function DenseIntervalOMaxWorkspace(
     budget = 1 .- vec(sum(ambiguity_set.lower; dims = 1))
     scratch = Vector{Int32}(undef, num_target(ambiguity_set))
     perm = Vector{Int32}(undef, num_target(ambiguity_set))
-    actions = Vector{R}(undef, nactions)
-    return DenseIntervalOMaxWorkspace(budget, scratch, perm, actions)
+    return DenseIntervalOMaxWorkspace(budget, scratch, perm)
 end
 
 permutation(ws::DenseIntervalOMaxWorkspace) = ws.permutation
@@ -83,7 +81,7 @@ function ThreadedDenseIntervalOMaxWorkspace(
     perm = Vector{Int32}(undef, num_target(ambiguity_set))
 
     workspaces = [
-        DenseIntervalOMaxWorkspace(budget, scratch, perm, Vector{R}(undef, nactions))
+        DenseIntervalOMaxWorkspace(budget, scratch, perm)
         for _ in 1:Threads.nthreads()
     ]
     return ThreadedDenseIntervalOMaxWorkspace(workspaces)

diff --git a/test/base/bellman.jl b/test/base/bellman.jl
@@ -13,41 +13,41 @@ using IntervalMDP
     @testset "maximization" begin
         ws = IntervalMDP.construct_workspace(prob)
         strategy_cache = IntervalMDP.construct_strategy_cache(prob)
-        Vres = zeros(N, 2)
+        Vres = zeros(N, 1, 2)
         IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true)
-        @test Vres ≈ N[27 // 10, 17 // 10] # [0.3 * 2 + 0.7 * 3, 0.5 * 1 + 0.3 * 2 + 0.2 * 3]
+        @test Vres ≈ N[27 // 10, 17 // 10]' # [0.3 * 2 + 0.7 * 3, 0.5 * 1 + 0.3 * 2 + 0.2 * 3]
 
         ws = IntervalMDP.DenseIntervalOMaxWorkspace(prob, 1)
         strategy_cache = IntervalMDP.construct_strategy_cache(prob)
         Vres = similar(Vres)
         IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true)
-        @test Vres ≈ N[27 // 10, 17 // 10]
+        @test Vres ≈ N[27 // 10, 17 // 10]'
 
         ws = IntervalMDP.ThreadedDenseIntervalOMaxWorkspace(prob, 1)
         strategy_cache = IntervalMDP.construct_strategy_cache(prob)
         Vres = similar(Vres)
         IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true)
-        @test Vres ≈ N[27 // 10, 17 // 10]
+        @test Vres ≈ N[27 // 10, 17 // 10]'
     end
 
     #### Minimization
     @testset "minimization" begin
         ws = IntervalMDP.construct_workspace(prob)
         strategy_cache = IntervalMDP.construct_strategy_cache(prob)
-        Vres = zeros(N, 2)
+        Vres = zeros(N, 1, 2)
         IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false)
-        @test Vres ≈ N[17 // 10, 15 // 10]  # [0.5 * 1 + 0.3 * 2 + 0.2 * 3, 0.6 * 1 + 0.3 * 2 + 0.1 * 3]
+        @test Vres ≈ N[17 // 10, 15 // 10]'  # [0.5 * 1 + 0.3 * 2 + 0.2 * 3, 0.6 * 1 + 0.3 * 2 + 0.1 * 3]
 
         ws = IntervalMDP.DenseIntervalOMaxWorkspace(prob, 1)
         strategy_cache = IntervalMDP.construct_strategy_cache(prob)
         Vres = similar(Vres)
         IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false)
-        @test Vres ≈ N[17 // 10, 15 // 10]
+        @test Vres ≈ N[17 // 10, 15 // 10]'
 
         ws = IntervalMDP.ThreadedDenseIntervalOMaxWorkspace(prob, 1)
         strategy_cache = IntervalMDP.construct_strategy_cache(prob)
         Vres = similar(Vres)
         IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false)
-        @test Vres ≈ N[17 // 10, 15 // 10]
+        @test Vres ≈ N[17 // 10, 15 // 10]'
     end
 end
diff --git a/test/base/factored.jl b/test/base/factored.jl
@@ -424,7 +424,7 @@ end
         end
 
         @testset "maximization" begin
-            Vexpected = IntervalMDP.bellman(V, imc; upper_bound = true) # Using O-maximization, should be equivalent
+            Vexpected = IntervalMDP.expectation(V, imc; upper_bound = true) # Using O-maximization, should be equivalent
 
             ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation())
             strategy_cache = IntervalMDP.construct_strategy_cache(imc)
@@ -468,7 +468,7 @@ end
         end
 
         @testset "minimization" begin
-            Vexpected = IntervalMDP.bellman(V, imc; upper_bound = false) # Using O-maximization, should be equivalent
+            Vexpected = IntervalMDP.expectation(V, imc; upper_bound = false) # Using O-maximization, should be equivalent
 
             ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation())
             strategy_cache = IntervalMDP.construct_strategy_cache(imc)

diff --git a/test/base/imdp.jl b/test/base/imdp.jl
@@ -49,29 +49,29 @@ using IntervalMDP
     @testset "bellman" begin
         V = N[1, 2, 3]
 
-        Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true)
+        Vres = IntervalMDP.expectation(V, mdp; upper_bound = false, maximize = true)
         @test Vres ≈ N[
             (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,
             (3 // 10) * 1 + (3 // 10) * 2 + (2 // 5) * 3,
             1 * 3,
         ]
 
-        Vres = similar(Vres)
+        Vres = Array{N}(undef, (3, 3))
         IntervalMDP.expectation!(Vres, V, mdp; upper_bound = false, maximize = true)
         @test Vres ≈ N[
             (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,
             (3 // 10) * 1 + (3 // 10) * 2 + (2 // 5) * 3,
             1 * 3,
         ]
 
-        Vres = IntervalMDP.bellman(V, mdp; upper_bound = true, maximize = false)
+        Vres = IntervalMDP.expectation(V, mdp; upper_bound = true, maximize = false)
         @test Vres ≈ N[
             (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,
             (1 // 5) * 1 + (2 // 5) * 2 + (2 // 5) * 3,
             1 * 3,
         ]
 
-        Vres = similar(Vres)
+        Vres = Array{N}(undef, (3, 3))
         IntervalMDP.expectation!(Vres, V, mdp; upper_bound = true, maximize = false)
         @test Vres ≈ N[
             (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,

diff --git a/test/base/product.jl b/test/base/product.jl
@@ -137,7 +137,7 @@ end
                 0 5
             ]
 
-            Vres = IntervalMDP.bellman(V, prod_proc; upper_bound = false)
+            Vres = IntervalMDP.expectation(V, prod_proc; upper_bound = false)
 
             @test Vres ≈ N[
                 30//10 24//10
@@ -220,7 +220,7 @@ end
             ]
 
             # No Strategy
-            Vres = IntervalMDP.bellman(V, prod_proc; upper_bound = false)
+            Vres = IntervalMDP.expectation(V, prod_proc; upper_bound = false)
             @test Vtar ≈ Vres atol=eps
 
             # Non Stationary Strategy (Init iteration)
@@ -421,7 +421,7 @@ end
                 45//10 5
             ]
 
-            Vres = IntervalMDP.bellman(V, prod_proc; upper_bound = false)
+            Vres = IntervalMDP.expectation(V, prod_proc; upper_bound = false)
             @test Vres ≈ Vtar
         end
     end
@@ -504,7 +504,7 @@ end
             ]
 
             # No Strategy
-            Vres = IntervalMDP.bellman(V, prod_proc; upper_bound = false)
+            Vres = IntervalMDP.expectation(V, prod_proc; upper_bound = false)
             @test Vtar ≈ Vres atol=eps
 
             # Non Stationary Strategy

diff --git a/test/cuda/dense/imdp.jl b/test/cuda/dense/imdp.jl
@@ -48,7 +48,7 @@ using IntervalMDP, CUDA
 
     @testset "bellman" begin
         V = IntervalMDP.cu(N[1, 2, 3])
-        Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true)
+        Vres = IntervalMDP.expectation(V, mdp; upper_bound = false, maximize = true)
         Vres = IntervalMDP.cpu(Vres)  # Convert to CPU for testing
         @test Vres ≈ N[
             (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,

diff --git a/test/cuda/sparse/imdp.jl b/test/cuda/sparse/imdp.jl
@@ -51,7 +51,7 @@ using IntervalMDP, CUDA, SparseArrays
 
     @testset "bellman" begin
         V = IntervalMDP.cu(N[1, 2, 3])
-        Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true)
+        Vres = IntervalMDP.expectation(V, mdp; upper_bound = false, maximize = true)
         Vres = IntervalMDP.cpu(Vres)  # Convert to CPU for testing
         @test Vres ≈ N[
             (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,

diff --git a/test/sparse/factored.jl b/test/sparse/factored.jl
@@ -90,7 +90,7 @@ end
         end
 
         @testset "maximization" begin
-            Vexpected = IntervalMDP.bellman(V, imc; upper_bound = true) # Using O-maximization, should be equivalent
+            Vexpected = IntervalMDP.expectation(V, imc; upper_bound = true) # Using O-maximization, should be equivalent
 
             ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation())
             strategy_cache = IntervalMDP.construct_strategy_cache(imc)
@@ -134,7 +134,7 @@ end
         end
 
         @testset "minimization" begin
-            Vexpected = IntervalMDP.bellman(V, imc; upper_bound = false) # Using O-maximization, should be equivalent
+            Vexpected = IntervalMDP.expectation(V, imc; upper_bound = false) # Using O-maximization, should be equivalent
 
             ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation())
             strategy_cache = IntervalMDP.construct_strategy_cache(imc)

diff --git a/test/sparse/imdp.jl b/test/sparse/imdp.jl
@@ -51,7 +51,7 @@ using IntervalMDP, SparseArrays
 
     @testset "bellman" begin
         V = N[1, 2, 3]
-        Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true)
+        Vres = IntervalMDP.expectation(V, mdp; upper_bound = false, maximize = true)
         @test Vres ≈ N[
             (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,
             (3 // 10) * 1 + (3 // 10) * 2 + (2 // 5) * 3,