Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 5 additions & 13 deletions src/bellman.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
bellman(V, model; upper_bound = false, maximize = true)
expectation(V, model; upper_bound = false, maximize = true)

Compute robust Bellman update with the value function `V` and the model `model`, e.g. [`IntervalMarkovDecisionProcess`](@ref),
that upper or lower bounds the expectation of the value function `V`.
Expand Down Expand Up @@ -56,7 +56,7 @@ istates = [Int32(1)]
model = IntervalMarkovDecisionProcess(transition_probs, istates)

Vprev = [1.0, 2.0, 3.0]
Vcur = IntervalMDP.bellman(Vprev, model; upper_bound = false)
Vcur = IntervalMDP.expectation(Vprev, model; upper_bound = false)

# output

Expand All @@ -71,15 +71,15 @@ Vcur = IntervalMDP.bellman(Vprev, model; upper_bound = false)
For a hot-loop, it is more efficient to use `bellman!` and pass in pre-allocated objects.

"""
function bellman(
function expectation(
V,
model,
alg::BellmanAlgorithm = default_bellman_algorithm(model);
upper_bound = false,
maximize = true,
prop = nothing,
)
Vres = similar(V, source_shape(model))
Vres = Array{eltype(V)}(undef, (action_values(model)..., state_values(model)...))

return expectation!(
Vres,
Expand Down Expand Up @@ -451,17 +451,9 @@ Base.@propagate_inbounds function state_expectation!(
for jₐ in available(model, jₛ)
ambiguity_set = marginal[jₐ, jₛ]
budget = workspace.budget[sub2ind(marginal, jₐ, jₛ)]
workspace.actions[jₐ] =
Vres[jₐ, jₛ] =
state_action_expectation(workspace, V, ambiguity_set, budget, upper_bound)
end

Vres[jₛ] = extract_strategy!(
strategy_cache,
workspace.actions,
available(model, jₛ),
jₛ,
maximize,
)
end


Expand Down
30 changes: 29 additions & 1 deletion src/robust_value_iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,35 @@ function _value_iteration!(problem::AbstractIntervalMDPProblem, alg; callback =
return value_function.current, k, value_function.previous, strategy_cache
end

function bellman_update!(workspace, strategy_cache, value_function, k, mp, spec)
function bellman_update!(workspace, strategy_cache, value_function::StateValueFunction, k, mp, spec)

# 1. compute expectation for Q(s, a)
expectation!(
workspace,
select_strategy_cache(strategy_cache, k),
value_function.intermediate_state_action_value,
value_function.previous,
select_model(mp, k); # For time-varying available and labelling functions
upper_bound = isoptimistic(spec),
maximize = ismaximize(spec),
prop = system_property(spec),
)

# 2. extract strategy and compute V(s) = max_a Q(s, a)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# 2. extract strategy and compute V(s) = max_a Q(s, a)
# 2. extract strategy and compute V'(s) = max_a Q(s, a)

strategy!(
select_strategy_cache(strategy_cache, k),
value_function.current,
value_function.intermediate_state_action_value,
select_model(mp, k),
ismaximize(spec),
)

# 3. post process
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# 3. post process
# 3. post process to compute V(s) = g(s, V'(s)) where the definition of g depends on the objective

step_postprocess_value_function!(value_function, spec)
step_postprocess_strategy_cache!(strategy_cache)
end

function bellman_update!(workspace, strategy_cache::NonOptimizingStrategyCache, value_function::StateValueFunction, k, mp, spec)
expectation!(
workspace,
select_strategy_cache(strategy_cache, k),
Expand Down
23 changes: 23 additions & 0 deletions src/strategy_cache.jl
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,26 @@ function _extract_strategy!(cur_strategy, values, available_actions, neutral, j
@inbounds cur_strategy[jₛ] = opt_index
return opt_val
end


function strategy!(
strategy_cache::OptimizingStrategyCache,
Vres::AbstractArray{R},
Q::AbstractArray{R},
model,
maximize,
) where {R <: Real}

#TODO: can be threaded?
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Leave it be for now - we can always change it later.

for jₛ in CartesianIndices(source_shape(model))
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for jₛ in CartesianIndices(source_shape(model))
@inbounds for jₛ in CartesianIndices(source_shape(model))


Vres[jₛ] = extract_strategy!(
strategy_cache,
Q[:, jₛ],
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's try to avoid expensive copies (expensive = many allocations, not large allocations. Previously it was allocating and copying for every state).

Suggested change
Q[:, jₛ],
@view(Q[:, jₛ]),

available(model, jₛ),
jₛ,
maximize,
)

end
end
6 changes: 3 additions & 3 deletions src/value.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ function StateValueFunction(problem::AbstractIntervalMDPProblem)
previous .= zero(valuetype(mp))
current = copy(previous)

dim = Tuple(Iterators.flatten(zip(action_values(mp), state_values(mp))))
# interleaved concat gives shape: (a1, a2) , (s1, s2) => (a1, s1, a2, s2)
dim = (action_values(mp)..., state_values(mp)...)
# concat gives shape: (a1, a2) , (s1, s2) => (a1, a2, s1, s2)
# (a, s) to access s more frequently due to column major
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# (a, s) to access s more frequently due to column major
# (a, s) to access a more frequently due to column major

# TODO: works for IMDP, need to check for fIMDP
intermediate_state_action_value = arrayfactory(mp, valuetype(mp), dim)
Expand Down Expand Up @@ -45,7 +45,7 @@ end

function StateActionValueFunction(problem::AbstractIntervalMDPProblem)
mp = system(problem)
dim = Tuple(Iterators.flatten(zip(action_values(mp), state_values(mp))))
dim = (action_values(mp)..., state_values(mp)...)
# TODO: works for IMDP, need to check for fIMDP
previous = arrayfactory(mp, valuetype(mp), dim)
previous .= zero(valuetype(mp))
Expand Down
6 changes: 2 additions & 4 deletions src/workspace.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ struct DenseIntervalOMaxWorkspace{T <: Real}
budget::Vector{T}
scratch::Vector{Int32}
permutation::Vector{Int32}
actions::Vector{T}
end

function DenseIntervalOMaxWorkspace(
Expand All @@ -63,8 +62,7 @@ function DenseIntervalOMaxWorkspace(
budget = 1 .- vec(sum(ambiguity_set.lower; dims = 1))
scratch = Vector{Int32}(undef, num_target(ambiguity_set))
perm = Vector{Int32}(undef, num_target(ambiguity_set))
actions = Vector{R}(undef, nactions)
return DenseIntervalOMaxWorkspace(budget, scratch, perm, actions)
return DenseIntervalOMaxWorkspace(budget, scratch, perm)
end

permutation(ws::DenseIntervalOMaxWorkspace) = ws.permutation
Expand All @@ -83,7 +81,7 @@ function ThreadedDenseIntervalOMaxWorkspace(
perm = Vector{Int32}(undef, num_target(ambiguity_set))

workspaces = [
DenseIntervalOMaxWorkspace(budget, scratch, perm, Vector{R}(undef, nactions))
DenseIntervalOMaxWorkspace(budget, scratch, perm)
for _ in 1:Threads.nthreads()
]
return ThreadedDenseIntervalOMaxWorkspace(workspaces)
Expand Down
16 changes: 8 additions & 8 deletions test/base/bellman.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,41 +13,41 @@ using IntervalMDP
@testset "maximization" begin
ws = IntervalMDP.construct_workspace(prob)
strategy_cache = IntervalMDP.construct_strategy_cache(prob)
Vres = zeros(N, 2)
Vres = zeros(N, 1, 2)
IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true)
@test Vres ≈ N[27 // 10, 17 // 10] # [0.3 * 2 + 0.7 * 3, 0.5 * 1 + 0.3 * 2 + 0.2 * 3]
@test Vres ≈ N[27 // 10, 17 // 10]' # [0.3 * 2 + 0.7 * 3, 0.5 * 1 + 0.3 * 2 + 0.2 * 3]

ws = IntervalMDP.DenseIntervalOMaxWorkspace(prob, 1)
strategy_cache = IntervalMDP.construct_strategy_cache(prob)
Vres = similar(Vres)
IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true)
@test Vres ≈ N[27 // 10, 17 // 10]
@test Vres ≈ N[27 // 10, 17 // 10]'

ws = IntervalMDP.ThreadedDenseIntervalOMaxWorkspace(prob, 1)
strategy_cache = IntervalMDP.construct_strategy_cache(prob)
Vres = similar(Vres)
IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true)
@test Vres ≈ N[27 // 10, 17 // 10]
@test Vres ≈ N[27 // 10, 17 // 10]'
end

#### Minimization
@testset "minimization" begin
ws = IntervalMDP.construct_workspace(prob)
strategy_cache = IntervalMDP.construct_strategy_cache(prob)
Vres = zeros(N, 2)
Vres = zeros(N, 1, 2)
IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false)
@test Vres ≈ N[17 // 10, 15 // 10] # [0.5 * 1 + 0.3 * 2 + 0.2 * 3, 0.6 * 1 + 0.3 * 2 + 0.1 * 3]
@test Vres ≈ N[17 // 10, 15 // 10]' # [0.5 * 1 + 0.3 * 2 + 0.2 * 3, 0.6 * 1 + 0.3 * 2 + 0.1 * 3]

ws = IntervalMDP.DenseIntervalOMaxWorkspace(prob, 1)
strategy_cache = IntervalMDP.construct_strategy_cache(prob)
Vres = similar(Vres)
IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false)
@test Vres ≈ N[17 // 10, 15 // 10]
@test Vres ≈ N[17 // 10, 15 // 10]'

ws = IntervalMDP.ThreadedDenseIntervalOMaxWorkspace(prob, 1)
strategy_cache = IntervalMDP.construct_strategy_cache(prob)
Vres = similar(Vres)
IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false)
@test Vres ≈ N[17 // 10, 15 // 10]
@test Vres ≈ N[17 // 10, 15 // 10]'
end
end
4 changes: 2 additions & 2 deletions test/base/factored.jl
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ end
end

@testset "maximization" begin
Vexpected = IntervalMDP.bellman(V, imc; upper_bound = true) # Using O-maximization, should be equivalent
Vexpected = IntervalMDP.expectation(V, imc; upper_bound = true) # Using O-maximization, should be equivalent

ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation())
strategy_cache = IntervalMDP.construct_strategy_cache(imc)
Expand Down Expand Up @@ -468,7 +468,7 @@ end
end

@testset "minimization" begin
Vexpected = IntervalMDP.bellman(V, imc; upper_bound = false) # Using O-maximization, should be equivalent
Vexpected = IntervalMDP.expectation(V, imc; upper_bound = false) # Using O-maximization, should be equivalent

ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation())
strategy_cache = IntervalMDP.construct_strategy_cache(imc)
Expand Down
8 changes: 4 additions & 4 deletions test/base/imdp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,29 +49,29 @@ using IntervalMDP
@testset "bellman" begin
V = N[1, 2, 3]

Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true)
Vres = IntervalMDP.expectation(V, mdp; upper_bound = false, maximize = true)
@test Vres ≈ N[
(1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,
(3 // 10) * 1 + (3 // 10) * 2 + (2 // 5) * 3,
1 * 3,
]

Vres = similar(Vres)
Vres = Array{N}(undef, (3, 3))
IntervalMDP.expectation!(Vres, V, mdp; upper_bound = false, maximize = true)
@test Vres ≈ N[
(1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,
(3 // 10) * 1 + (3 // 10) * 2 + (2 // 5) * 3,
1 * 3,
]

Vres = IntervalMDP.bellman(V, mdp; upper_bound = true, maximize = false)
Vres = IntervalMDP.expectation(V, mdp; upper_bound = true, maximize = false)
@test Vres ≈ N[
(1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,
(1 // 5) * 1 + (2 // 5) * 2 + (2 // 5) * 3,
1 * 3,
]

Vres = similar(Vres)
Vres = Array{N}(undef, (3, 3))
IntervalMDP.expectation!(Vres, V, mdp; upper_bound = true, maximize = false)
@test Vres ≈ N[
(1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,
Expand Down
8 changes: 4 additions & 4 deletions test/base/product.jl
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ end
0 5
]

Vres = IntervalMDP.bellman(V, prod_proc; upper_bound = false)
Vres = IntervalMDP.expectation(V, prod_proc; upper_bound = false)

@test Vres ≈ N[
30//10 24//10
Expand Down Expand Up @@ -220,7 +220,7 @@ end
]

# No Strategy
Vres = IntervalMDP.bellman(V, prod_proc; upper_bound = false)
Vres = IntervalMDP.expectation(V, prod_proc; upper_bound = false)
@test Vtar ≈ Vres atol=eps

# Non Stationary Strategy (Init iteration)
Expand Down Expand Up @@ -421,7 +421,7 @@ end
45//10 5
]

Vres = IntervalMDP.bellman(V, prod_proc; upper_bound = false)
Vres = IntervalMDP.expectation(V, prod_proc; upper_bound = false)
@test Vres ≈ Vtar
end
end
Expand Down Expand Up @@ -504,7 +504,7 @@ end
]

# No Strategy
Vres = IntervalMDP.bellman(V, prod_proc; upper_bound = false)
Vres = IntervalMDP.expectation(V, prod_proc; upper_bound = false)
@test Vtar ≈ Vres atol=eps

# Non Stationary Strategy
Expand Down
2 changes: 1 addition & 1 deletion test/cuda/dense/imdp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ using IntervalMDP, CUDA

@testset "bellman" begin
V = IntervalMDP.cu(N[1, 2, 3])
Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true)
Vres = IntervalMDP.expectation(V, mdp; upper_bound = false, maximize = true)
Vres = IntervalMDP.cpu(Vres) # Convert to CPU for testing
@test Vres ≈ N[
(1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,
Expand Down
2 changes: 1 addition & 1 deletion test/cuda/sparse/imdp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ using IntervalMDP, CUDA, SparseArrays

@testset "bellman" begin
V = IntervalMDP.cu(N[1, 2, 3])
Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true)
Vres = IntervalMDP.expectation(V, mdp; upper_bound = false, maximize = true)
Vres = IntervalMDP.cpu(Vres) # Convert to CPU for testing
@test Vres ≈ N[
(1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,
Expand Down
4 changes: 2 additions & 2 deletions test/sparse/factored.jl
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ end
end

@testset "maximization" begin
Vexpected = IntervalMDP.bellman(V, imc; upper_bound = true) # Using O-maximization, should be equivalent
Vexpected = IntervalMDP.expectation(V, imc; upper_bound = true) # Using O-maximization, should be equivalent

ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation())
strategy_cache = IntervalMDP.construct_strategy_cache(imc)
Expand Down Expand Up @@ -134,7 +134,7 @@ end
end

@testset "minimization" begin
Vexpected = IntervalMDP.bellman(V, imc; upper_bound = false) # Using O-maximization, should be equivalent
Vexpected = IntervalMDP.expectation(V, imc; upper_bound = false) # Using O-maximization, should be equivalent

ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation())
strategy_cache = IntervalMDP.construct_strategy_cache(imc)
Expand Down
2 changes: 1 addition & 1 deletion test/sparse/imdp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ using IntervalMDP, SparseArrays

@testset "bellman" begin
V = N[1, 2, 3]
Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true)
Vres = IntervalMDP.expectation(V, mdp; upper_bound = false, maximize = true)
@test Vres ≈ N[
(1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3,
(3 // 10) * 1 + (3 // 10) * 2 + (2 // 5) * 3,
Expand Down
Loading