From 511a901b47963606894ad308803a7711c01c47f9 Mon Sep 17 00:00:00 2001
From: JeffersonYeh <formosat1416@icloud.com>
Date: Thu, 12 Mar 2026 18:56:30 +0100
Subject: [PATCH 1/4] Split expectation and strategy computations for Dense
 IMDP

---
 src/bellman.jl                | 12 ++----------
 src/robust_value_iteration.jl | 30 +++++++++++++++++++++++++++++-
 src/strategy_cache.jl         | 23 +++++++++++++++++++++++
 src/value.jl                  |  6 +++---
 src/workspace.jl              |  6 ++----
 test/base/bellman.jl          | 16 ++++++++--------
 test/base/imdp.jl             |  4 ++--
 7 files changed, 69 insertions(+), 28 deletions(-)

diff --git a/src/bellman.jl b/src/bellman.jl
index 2141673..45822e4 100644
--- a/src/bellman.jl
+++ b/src/bellman.jl
@@ -79,7 +79,7 @@ function bellman(
     maximize = true,
     prop = nothing,
 )
-    Vres = similar(V, source_shape(model))
+    Vres = Array{eltype(V)}(undef, (action_values(model)..., state_values(model)...))
 
     return expectation!(
         Vres,
@@ -451,17 +451,9 @@ Base.@propagate_inbounds function state_expectation!(
     for jₐ in available(model, jₛ)
         ambiguity_set = marginal[jₐ, jₛ]
         budget = workspace.budget[sub2ind(marginal, jₐ, jₛ)]
-        workspace.actions[jₐ] =
+        Vres[jₐ, jₛ] =
             state_action_expectation(workspace, V, ambiguity_set, budget, upper_bound)
     end
-
-    Vres[jₛ] = extract_strategy!(
-        strategy_cache,
-        workspace.actions,
-        available(model, jₛ),
-        jₛ,
-        maximize,
-    )
 end
 
  
diff --git a/src/robust_value_iteration.jl b/src/robust_value_iteration.jl
index 1207bd5..252099c 100644
--- a/src/robust_value_iteration.jl
+++ b/src/robust_value_iteration.jl
@@ -205,7 +205,35 @@ function _value_iteration!(problem::AbstractIntervalMDPProblem, alg; callback =
     return value_function.current, k, value_function.previous, strategy_cache
 end
 
-function bellman_update!(workspace, strategy_cache, value_function, k, mp, spec)
+function bellman_update!(workspace, strategy_cache, value_function::StateValueFunction, k, mp, spec)
+
+    # 1. compute expectation for Q(s, a)
+    expectation!(
+        workspace,
+        select_strategy_cache(strategy_cache, k),
+        value_function.intermediate_state_action_value,
+        value_function.previous,
+        select_model(mp, k);  # For time-varying available and labelling functions
+        upper_bound = isoptimistic(spec),
+        maximize = ismaximize(spec),
+        prop = system_property(spec),
+    )
+
+    # 2. extract strategy and compute V(s) = max_a Q(s, a)
+    strategy!(
+        select_strategy_cache(strategy_cache, k),
+        value_function.current,
+        value_function.intermediate_state_action_value,
+        select_model(mp, k),
+        ismaximize(spec),
+    )
+
+    # 3. post process
+    step_postprocess_value_function!(value_function, spec)
+    step_postprocess_strategy_cache!(strategy_cache)
+end
+
+function bellman_update!(workspace, strategy_cache::NonOptimizingStrategyCache, value_function::StateValueFunction, k, mp, spec)
     expectation!(
         workspace,
         select_strategy_cache(strategy_cache, k),
diff --git a/src/strategy_cache.jl b/src/strategy_cache.jl
index 0da47b1..209dc40 100644
--- a/src/strategy_cache.jl
+++ b/src/strategy_cache.jl
@@ -159,3 +159,26 @@ function _extract_strategy!(cur_strategy, values, available_actions, neutral, j
     @inbounds cur_strategy[jₛ] = opt_index
     return opt_val
 end
+
+
+function strategy!(
+    strategy_cache::OptimizingStrategyCache,
+    Vres::AbstractArray{R},
+    Q::AbstractArray{R},
+    model,
+    maximize,
+) where {R <: Real}
+
+    #TODO: can be threaded?
+    for jₛ in CartesianIndices(source_shape(model))
+
+        Vres[jₛ] = extract_strategy!(
+            strategy_cache,
+            Q[:, jₛ],
+            available(model, jₛ),
+            jₛ,
+            maximize,
+        )
+
+    end
+end
\ No newline at end of file
diff --git a/src/value.jl b/src/value.jl
index cd6c18e..d0613b6 100644
--- a/src/value.jl
+++ b/src/value.jl
@@ -12,8 +12,8 @@ function StateValueFunction(problem::AbstractIntervalMDPProblem)
     previous .= zero(valuetype(mp))
     current = copy(previous)
 
-    dim = Tuple(Iterators.flatten(zip(action_values(mp), state_values(mp))))
-    # interleaved concat gives shape: (a1, a2) , (s1, s2) => (a1, s1, a2, s2)
+    dim = (action_values(mp)..., state_values(mp)...)
+    # concat gives shape: (a1, a2) , (s1, s2) => (a1, a2, s1, s2)
     # (a, s) to access s more frequently due to column major
     # TODO: works for IMDP, need to check for fIMDP
     intermediate_state_action_value = arrayfactory(mp, valuetype(mp), dim)
@@ -45,7 +45,7 @@ end
 
 function StateActionValueFunction(problem::AbstractIntervalMDPProblem)
     mp = system(problem)
-    dim = Tuple(Iterators.flatten(zip(action_values(mp), state_values(mp))))
+    dim = (action_values(mp)..., state_values(mp)...)
     # TODO: works for IMDP, need to check for fIMDP
     previous = arrayfactory(mp, valuetype(mp), dim)
     previous .= zero(valuetype(mp))
diff --git a/src/workspace.jl b/src/workspace.jl
index c36ca4c..56a7b4b 100644
--- a/src/workspace.jl
+++ b/src/workspace.jl
@@ -53,7 +53,6 @@ struct DenseIntervalOMaxWorkspace{T <: Real}
     budget::Vector{T}
     scratch::Vector{Int32}
     permutation::Vector{Int32}
-    actions::Vector{T}
 end
 
 function DenseIntervalOMaxWorkspace(
@@ -63,8 +62,7 @@ function DenseIntervalOMaxWorkspace(
     budget = 1 .- vec(sum(ambiguity_set.lower; dims = 1))
     scratch = Vector{Int32}(undef, num_target(ambiguity_set))
     perm = Vector{Int32}(undef, num_target(ambiguity_set))
-    actions = Vector{R}(undef, nactions)
-    return DenseIntervalOMaxWorkspace(budget, scratch, perm, actions)
+    return DenseIntervalOMaxWorkspace(budget, scratch, perm)
 end
 
 permutation(ws::DenseIntervalOMaxWorkspace) = ws.permutation
@@ -83,7 +81,7 @@ function ThreadedDenseIntervalOMaxWorkspace(
     perm = Vector{Int32}(undef, num_target(ambiguity_set))
 
     workspaces = [
-        DenseIntervalOMaxWorkspace(budget, scratch, perm, Vector{R}(undef, nactions))
+        DenseIntervalOMaxWorkspace(budget, scratch, perm)
         for _ in 1:Threads.nthreads()
     ]
     return ThreadedDenseIntervalOMaxWorkspace(workspaces)
diff --git a/test/base/bellman.jl b/test/base/bellman.jl
index 9fb1afd..e981d23 100644
--- a/test/base/bellman.jl
+++ b/test/base/bellman.jl
@@ -13,41 +13,41 @@ using IntervalMDP
     @testset "maximization" begin
         ws = IntervalMDP.construct_workspace(prob)
         strategy_cache = IntervalMDP.construct_strategy_cache(prob)
-        Vres = zeros(N, 2)
+        Vres = zeros(N, 1, 2)
         IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true)
-        @test Vres ≈ N[27 // 10, 17 // 10] # [0.3 * 2 + 0.7 * 3, 0.5 * 1 + 0.3 * 2 + 0.2 * 3]
+        @test Vres ≈ N[27 // 10, 17 // 10]' # [0.3 * 2 + 0.7 * 3, 0.5 * 1 + 0.3 * 2 + 0.2 * 3]
 
         ws = IntervalMDP.DenseIntervalOMaxWorkspace(prob, 1)
         strategy_cache = IntervalMDP.construct_strategy_cache(prob)
         Vres = similar(Vres)
         IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true)
-        @test Vres ≈ N[27 // 10, 17 // 10]
+        @test Vres ≈ N[27 // 10, 17 // 10]'
 
         ws = IntervalMDP.ThreadedDenseIntervalOMaxWorkspace(prob, 1)
         strategy_cache = IntervalMDP.construct_strategy_cache(prob)
         Vres = similar(Vres)
         IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true)
-        @test Vres ≈ N[27 // 10, 17 // 10]
+        @test Vres ≈ N[27 // 10, 17 // 10]'
     end
 
     #### Minimization
     @testset "minimization" begin
         ws = IntervalMDP.construct_workspace(prob)
         strategy_cache = IntervalMDP.construct_strategy_cache(prob)
-        Vres = zeros(N, 2)
+        Vres = zeros(N, 1, 2)
         IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false)
-        @test Vres ≈ N[17 // 10, 15 // 10]  # [0.5 * 1 + 0.3 * 2 + 0.2 * 3, 0.6 * 1 + 0.3 * 2 + 0.1 * 3]
+        @test Vres ≈ N[17 // 10, 15 // 10]'  # [0.5 * 1 + 0.3 * 2 + 0.2 * 3, 0.6 * 1 + 0.3 * 2 + 0.1 * 3]
 
         ws = IntervalMDP.DenseIntervalOMaxWorkspace(prob, 1)
         strategy_cache = IntervalMDP.construct_strategy_cache(prob)
         Vres = similar(Vres)
         IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false)
-        @test Vres ≈ N[17 // 10, 15 // 10]
+        @test Vres ≈ N[17 // 10, 15 // 10]'
 
         ws = IntervalMDP.ThreadedDenseIntervalOMaxWorkspace(prob, 1)
         strategy_cache = IntervalMDP.construct_strategy_cache(prob)
         Vres = similar(Vres)
         IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false)
-        @test Vres ≈ N[17 // 10, 15 // 10]
+        @test Vres ≈ N[17 // 10, 15 // 10]'
     end
 end
diff --git a/test/base/imdp.jl b/test/base/imdp.jl
index 7480faa..a3a0e81 100644
--- a/test/base/imdp.jl
+++ b/test/base/imdp.jl
@@ -56,7 +56,7 @@ using IntervalMDP
             1 * 3,
         ]
 
-        Vres = similar(Vres)
+        Vres = Array{N}(undef, (3, 3))
         IntervalMDP.expectation!(Vres, V, mdp; upper_bound = false, maximize = true)
         @test Vres ≈ N[
             (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,
@@ -71,7 +71,7 @@ using IntervalMDP
             1 * 3,
         ]
 
-        Vres = similar(Vres)
+        Vres = Array{N}(undef, (3, 3))
         IntervalMDP.expectation!(Vres, V, mdp; upper_bound = true, maximize = false)
         @test Vres ≈ N[
             (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,

From 3354b50773a08856e45c9eaa89b9cefea832580a Mon Sep 17 00:00:00 2001
From: JeffersonYeh <formosat1416@icloud.com>
Date: Fri, 13 Mar 2026 00:05:56 +0100
Subject: [PATCH 2/4] rename bellman function to expectation

---
 src/bellman.jl           | 6 +++---
 test/base/factored.jl    | 4 ++--
 test/base/imdp.jl        | 4 ++--
 test/base/product.jl     | 8 ++++----
 test/cuda/dense/imdp.jl  | 2 +-
 test/cuda/sparse/imdp.jl | 2 +-
 test/sparse/factored.jl  | 4 ++--
 test/sparse/imdp.jl      | 2 +-
 8 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/bellman.jl b/src/bellman.jl
index 45822e4..a1a81c8 100644
--- a/src/bellman.jl
+++ b/src/bellman.jl
@@ -1,5 +1,5 @@
 """
-    bellman(V, model; upper_bound = false, maximize = true)
+    expectation(V, model; upper_bound = false, maximize = true)
 
 Compute robust Bellman update with the value function `V` and the model `model`, e.g. [`IntervalMarkovDecisionProcess`](@ref),
 that upper or lower bounds the expectation of the value function `V`.
@@ -56,7 +56,7 @@ istates = [Int32(1)]
 model = IntervalMarkovDecisionProcess(transition_probs, istates)
 
 Vprev = [1.0, 2.0, 3.0]
-Vcur = IntervalMDP.bellman(Vprev, model; upper_bound = false)
+Vcur = IntervalMDP.expectation(Vprev, model; upper_bound = false)
 
 # output
 
@@ -71,7 +71,7 @@ Vcur = IntervalMDP.bellman(Vprev, model; upper_bound = false)
     For a hot-loop, it is more efficient to use `bellman!` and pass in pre-allocated objects.
 
 """
-function bellman(
+function expectation(
     V,
     model,
     alg::BellmanAlgorithm = default_bellman_algorithm(model);
diff --git a/test/base/factored.jl b/test/base/factored.jl
index 1fb5264..526e0b3 100644
--- a/test/base/factored.jl
+++ b/test/base/factored.jl
@@ -424,7 +424,7 @@ end
         end
 
         @testset "maximization" begin
-            Vexpected = IntervalMDP.bellman(V, imc; upper_bound = true) # Using O-maximization, should be equivalent
+            Vexpected = IntervalMDP.expectation(V, imc; upper_bound = true) # Using O-maximization, should be equivalent
 
             ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation())
             strategy_cache = IntervalMDP.construct_strategy_cache(imc)
@@ -468,7 +468,7 @@ end
         end
 
         @testset "minimization" begin
-            Vexpected = IntervalMDP.bellman(V, imc; upper_bound = false) # Using O-maximization, should be equivalent
+            Vexpected = IntervalMDP.expectation(V, imc; upper_bound = false) # Using O-maximization, should be equivalent
 
             ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation())
             strategy_cache = IntervalMDP.construct_strategy_cache(imc)
diff --git a/test/base/imdp.jl b/test/base/imdp.jl
index a3a0e81..c81723c 100644
--- a/test/base/imdp.jl
+++ b/test/base/imdp.jl
@@ -49,7 +49,7 @@ using IntervalMDP
     @testset "bellman" begin
         V = N[1, 2, 3]
 
-        Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true)
+        Vres = IntervalMDP.expectation(V, mdp; upper_bound = false, maximize = true)
         @test Vres ≈ N[
             (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,
             (3 // 10) * 1 + (3 // 10) * 2 + (2 // 5) * 3,
@@ -64,7 +64,7 @@ using IntervalMDP
             1 * 3,
         ]
 
-        Vres = IntervalMDP.bellman(V, mdp; upper_bound = true, maximize = false)
+        Vres = IntervalMDP.expectation(V, mdp; upper_bound = true, maximize = false)
         @test Vres ≈ N[
             (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,
             (1 // 5) * 1 + (2 // 5) * 2 + (2 // 5) * 3,
diff --git a/test/base/product.jl b/test/base/product.jl
index 77edbfd..bd7b62a 100644
--- a/test/base/product.jl
+++ b/test/base/product.jl
@@ -137,7 +137,7 @@ end
                 0 5
             ]
 
-            Vres = IntervalMDP.bellman(V, prod_proc; upper_bound = false)
+            Vres = IntervalMDP.expectation(V, prod_proc; upper_bound = false)
 
             @test Vres ≈ N[
                 30//10 24//10
@@ -220,7 +220,7 @@ end
             ]
 
             # No Strategy
-            Vres = IntervalMDP.bellman(V, prod_proc; upper_bound = false)
+            Vres = IntervalMDP.expectation(V, prod_proc; upper_bound = false)
             @test Vtar ≈ Vres atol=eps
 
             # Non Stationary Strategy (Init iteration)
@@ -421,7 +421,7 @@ end
                 45//10 5
             ]
 
-            Vres = IntervalMDP.bellman(V, prod_proc; upper_bound = false)
+            Vres = IntervalMDP.expectation(V, prod_proc; upper_bound = false)
             @test Vres ≈ Vtar
         end
     end
@@ -504,7 +504,7 @@ end
             ]
 
             # No Strategy
-            Vres = IntervalMDP.bellman(V, prod_proc; upper_bound = false)
+            Vres = IntervalMDP.expectation(V, prod_proc; upper_bound = false)
             @test Vtar ≈ Vres atol=eps
 
             # Non Stationary Strategy
diff --git a/test/cuda/dense/imdp.jl b/test/cuda/dense/imdp.jl
index d70f748..67bb4e0 100644
--- a/test/cuda/dense/imdp.jl
+++ b/test/cuda/dense/imdp.jl
@@ -48,7 +48,7 @@ using IntervalMDP, CUDA
 
     @testset "bellman" begin
         V = IntervalMDP.cu(N[1, 2, 3])
-        Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true)
+        Vres = IntervalMDP.expectation(V, mdp; upper_bound = false, maximize = true)
         Vres = IntervalMDP.cpu(Vres)  # Convert to CPU for testing
         @test Vres ≈ N[
             (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,
diff --git a/test/cuda/sparse/imdp.jl b/test/cuda/sparse/imdp.jl
index a68301b..39fd616 100644
--- a/test/cuda/sparse/imdp.jl
+++ b/test/cuda/sparse/imdp.jl
@@ -51,7 +51,7 @@ using IntervalMDP, CUDA, SparseArrays
 
     @testset "bellman" begin
         V = IntervalMDP.cu(N[1, 2, 3])
-        Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true)
+        Vres = IntervalMDP.expectation(V, mdp; upper_bound = false, maximize = true)
         Vres = IntervalMDP.cpu(Vres)  # Convert to CPU for testing
         @test Vres ≈ N[
             (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,
diff --git a/test/sparse/factored.jl b/test/sparse/factored.jl
index 7291c6f..4baa401 100644
--- a/test/sparse/factored.jl
+++ b/test/sparse/factored.jl
@@ -90,7 +90,7 @@ end
         end
 
         @testset "maximization" begin
-            Vexpected = IntervalMDP.bellman(V, imc; upper_bound = true) # Using O-maximization, should be equivalent
+            Vexpected = IntervalMDP.expectation(V, imc; upper_bound = true) # Using O-maximization, should be equivalent
 
             ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation())
             strategy_cache = IntervalMDP.construct_strategy_cache(imc)
@@ -134,7 +134,7 @@ end
         end
 
         @testset "minimization" begin
-            Vexpected = IntervalMDP.bellman(V, imc; upper_bound = false) # Using O-maximization, should be equivalent
+            Vexpected = IntervalMDP.expectation(V, imc; upper_bound = false) # Using O-maximization, should be equivalent
 
             ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation())
             strategy_cache = IntervalMDP.construct_strategy_cache(imc)
diff --git a/test/sparse/imdp.jl b/test/sparse/imdp.jl
index 9b5ac4d..63ff30a 100644
--- a/test/sparse/imdp.jl
+++ b/test/sparse/imdp.jl
@@ -51,7 +51,7 @@ using IntervalMDP, SparseArrays
 
     @testset "bellman" begin
         V = N[1, 2, 3]
-        Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true)
+        Vres = IntervalMDP.expectation(V, mdp; upper_bound = false, maximize = true)
         @test Vres ≈ N[
             (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,
             (3 // 10) * 1 + (3 // 10) * 2 + (2 // 5) * 3,

From 4a3c494cef0059b796f6f3b589f21176989e5990 Mon Sep 17 00:00:00 2001
From: JeffersonYeh <formosat1416@icloud.com>
Date: Wed, 18 Mar 2026 11:01:52 +0100
Subject: [PATCH 3/4] speed performance improvements + minor documentation
 update

---
 src/robust_value_iteration.jl | 4 ++--
 src/strategy_cache.jl         | 4 ++--
 src/value.jl                  | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/robust_value_iteration.jl b/src/robust_value_iteration.jl
index 252099c..6012aab 100644
--- a/src/robust_value_iteration.jl
+++ b/src/robust_value_iteration.jl
@@ -219,7 +219,7 @@ function bellman_update!(workspace, strategy_cache, value_function::StateValueFu
         prop = system_property(spec),
     )
 
-    # 2. extract strategy and compute V(s) = max_a Q(s, a)
+    # 2. extract strategy and compute V'(s) = max_a Q(s, a)
     strategy!(
         select_strategy_cache(strategy_cache, k),
         value_function.current,
@@ -228,7 +228,7 @@ function bellman_update!(workspace, strategy_cache, value_function::StateValueFu
         ismaximize(spec),
     )
 
-    # 3. post process
+    # 3. post process to compute V(s) = g(s, V'(s)) where the definition of g depends on the objective
     step_postprocess_value_function!(value_function, spec)
     step_postprocess_strategy_cache!(strategy_cache)
 end
diff --git a/src/strategy_cache.jl b/src/strategy_cache.jl
index 209dc40..a8512c7 100644
--- a/src/strategy_cache.jl
+++ b/src/strategy_cache.jl
@@ -170,11 +170,11 @@ function strategy!(
 ) where {R <: Real}
 
     #TODO: can be threaded?
-    for jₛ in CartesianIndices(source_shape(model))
+    @inbounds for jₛ in CartesianIndices(source_shape(model))
 
         Vres[jₛ] = extract_strategy!(
             strategy_cache,
-            Q[:, jₛ],
+            @view(Q[:, jₛ]),
             available(model, jₛ),
             jₛ,
             maximize,
diff --git a/src/value.jl b/src/value.jl
index d0613b6..314236a 100644
--- a/src/value.jl
+++ b/src/value.jl
@@ -14,7 +14,7 @@ function StateValueFunction(problem::AbstractIntervalMDPProblem)
 
     dim = (action_values(mp)..., state_values(mp)...)
     # concat gives shape: (a1, a2) , (s1, s2) => (a1, a2, s1, s2)
-    # (a, s) to access s more frequently due to column major
+    # (a, s) to access a more frequently due to column major
     # TODO: works for IMDP, need to check for fIMDP
     intermediate_state_action_value = arrayfactory(mp, valuetype(mp), dim)
     intermediate_state_action_value .= zero(valuetype(mp))

From 45642f18f10bf86045b9e3da0e1884e2d31ed40d Mon Sep 17 00:00:00 2001
From: JeffersonYeh <formosat1416@icloud.com>
Date: Wed, 18 Mar 2026 14:05:14 +0100
Subject: [PATCH 4/4] Fix IMDP tests

---
 src/bellman.jl       |  2 +-
 test/base/imdp.jl    | 24 ++++++++++--------------
 test/base/product.jl |  5 ++++-
 3 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/src/bellman.jl b/src/bellman.jl
index a1a81c8..fa214c4 100644
--- a/src/bellman.jl
+++ b/src/bellman.jl
@@ -79,7 +79,7 @@ function expectation(
     maximize = true,
     prop = nothing,
 )
-    Vres = Array{eltype(V)}(undef, (action_values(model)..., state_values(model)...))
+    Vres = Array{eltype(V)}(undef, (action_values(model)..., size(V)...))
 
     return expectation!(
         Vres,
diff --git a/test/base/imdp.jl b/test/base/imdp.jl
index c81723c..b097171 100644
--- a/test/base/imdp.jl
+++ b/test/base/imdp.jl
@@ -51,32 +51,28 @@ using IntervalMDP
 
         Vres = IntervalMDP.expectation(V, mdp; upper_bound = false, maximize = true)
         @test Vres ≈ N[
-            (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,
-            (3 // 10) * 1 + (3 // 10) * 2 + (2 // 5) * 3,
-            1 * 3,
+            17//10 18//10 3//1
+            15//10 21//10 3//1
         ]
 
-        Vres = Array{N}(undef, (3, 3))
+        Vres = Array{N}(undef, (2, 3))
         IntervalMDP.expectation!(Vres, V, mdp; upper_bound = false, maximize = true)
         @test Vres ≈ N[
-            (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,
-            (3 // 10) * 1 + (3 // 10) * 2 + (2 // 5) * 3,
-            1 * 3,
+            17//10 18//10 3//1
+            15//10 21//10 3//1
         ]
 
         Vres = IntervalMDP.expectation(V, mdp; upper_bound = true, maximize = false)
         @test Vres ≈ N[
-            (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,
-            (1 // 5) * 1 + (2 // 5) * 2 + (2 // 5) * 3,
-            1 * 3,
+            27//10 23//10 3//1
+            17//10 22//10 3//1
         ]
 
-        Vres = Array{N}(undef, (3, 3))
+        Vres = Array{N}(undef, (2, 3))
         IntervalMDP.expectation!(Vres, V, mdp; upper_bound = true, maximize = false)
         @test Vres ≈ N[
-            (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,
-            (1 // 5) * 1 + (2 // 5) * 2 + (2 // 5) * 3,
-            1 * 3,
+            27//10 23//10 3//1
+            17//10 22//10 3//1
         ]
     end
 
diff --git a/test/base/product.jl b/test/base/product.jl
index bd7b62a..cbcec31 100644
--- a/test/base/product.jl
+++ b/test/base/product.jl
@@ -139,11 +139,14 @@ end
 
             Vres = IntervalMDP.expectation(V, prod_proc; upper_bound = false)
 
-            @test Vres ≈ N[
+            Vtar = N[
                 30//10 24//10
                 33//10 2
                 5 5
             ]
+            Vtar = reshape(Vtar, 1, size(Vtar)...)
+
+            @test Vres ≈ Vtar
         end
     end
 end