From 3949a0bd2cb6f04cc4525c6afc8d78ea936972db Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Thu, 21 Jul 2022 17:17:15 -0700 Subject: [PATCH] deprecated in favor of POMDPTools --- Project.toml | 2 + README.md | 17 +- src/POMDPModelTools.jl | 133 +--------- src/common_rl/from_env.jl | 244 ------------------ src/common_rl/to_env.jl | 100 -------- src/convenient_implementations.jl | 11 +- src/deprecated.jl | 2 +- src/distributions/bool.jl | 28 -- src/distributions/deterministic.jl | 19 -- src/distributions/implicit.jl | 45 ---- src/distributions/pretty_printing.jl | 36 --- src/distributions/sparse_cat.jl | 129 ---------- src/distributions/uniform.jl | 68 ----- src/distributions/weighted_iteration.jl | 19 -- src/fully_observable_pomdp.jl | 45 ---- src/generative_belief_mdp.jl | 61 ----- src/info.jl | 35 --- src/matrices.jl | 37 --- src/obs_weight.jl | 11 - src/ordered_spaces.jl | 83 ------ src/policy_evaluation.jl | 84 ------ src/sparse_tabular.jl | 324 ------------------------ src/state_action_reward.jl | 102 -------- src/terminal_state.jl | 20 -- src/underlying_mdp.jl | 40 --- src/visualization.jl | 50 ---- 26 files changed, 23 insertions(+), 1722 deletions(-) delete mode 100644 src/common_rl/from_env.jl delete mode 100644 src/common_rl/to_env.jl delete mode 100644 src/distributions/bool.jl delete mode 100644 src/distributions/deterministic.jl delete mode 100644 src/distributions/implicit.jl delete mode 100644 src/distributions/pretty_printing.jl delete mode 100644 src/distributions/sparse_cat.jl delete mode 100644 src/distributions/uniform.jl delete mode 100644 src/distributions/weighted_iteration.jl delete mode 100644 src/fully_observable_pomdp.jl delete mode 100644 src/generative_belief_mdp.jl delete mode 100644 src/info.jl delete mode 100644 src/matrices.jl delete mode 100644 src/obs_weight.jl delete mode 100644 src/ordered_spaces.jl delete mode 100644 src/policy_evaluation.jl delete mode 100644 src/sparse_tabular.jl delete mode 100644 src/state_action_reward.jl delete mode 100644 src/terminal_state.jl delete mode 100644 src/underlying_mdp.jl delete mode 100644 src/visualization.jl diff --git a/Project.toml b/Project.toml index daa1ec1..6f88d33 100644 --- a/Project.toml +++ b/Project.toml @@ -8,8 +8,10 @@ CommonRLInterface = "d842c3ba-07a1-494f-bbec-f5741b0a3e98" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" POMDPLinter = "f3bd98c0-eb40-45e2-9eb1-f2763262d755" +POMDPTools = "7588e00f-9cae-40de-98dc-e0c70c48cdd7" POMDPs = "a93abf59-7444-517b-a68a-c42f96afdd7d" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +Reexport = "189a3867-3050-52da-a836-e630ba90ab69" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Tricks = "410a4b4d-49e4-4fbc-ab6d-cb71b17b3775" diff --git a/README.md b/README.md index 38b0f6b..dcfde5e 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,3 @@ -# POMDPModelTools +# ~~POMDPModelTools~~ -[![Build Status](https://travis-ci.org/JuliaPOMDP/POMDPModelTools.jl.svg?branch=master)](https://travis-ci.org/JuliaPOMDP/POMDPModelTools.jl) -[![Coverage Status](https://coveralls.io/repos/github/JuliaPOMDP/POMDPModelTools.jl/badge.svg?branch=master)](https://coveralls.io/github/JuliaPOMDP/POMDPModelTools.jl?branch=master) -[![Stable Docs](https://img.shields.io/badge/docs-stable-blue.svg)](https://JuliaPOMDP.github.io/POMDPModelTools.jl/stable) -[![Dev Docs](https://img.shields.io/badge/docs-dev-blue.svg)](https://JuliaPOMDP.github.io/POMDPModelTools.jl/dev) - -Support tools for writing and working with [POMDPs.jl](github.com/JuliaPOMDP/POMDPs.jl) models and solvers. - -Please read the documentation for a list of tools: [https://JuliaPOMDP.github.io/POMDPModelTools.jl/stable](https://JuliaPOMDP.github.io/POMDPModelTools.jl/stable). If that link does not work, look at the development branch: [https://JuliaPOMDP.github.io/POMDPModelTools.jl/dev](https://JuliaPOMDP.github.io/POMDPModelTools.jl/dev) - -## Installation - -```julia-repl -julia> using Pkg; Pkg.add("POMDPModelTools") -``` +POMDPModelTools is deprecated! Use [POMDPTools](https://github.com/JuliaPOMDP/POMDPs.jl/tree/master/lib/POMDPTools) instead. diff --git a/src/POMDPModelTools.jl b/src/POMDPModelTools.jl index 3df2ac3..7df74ff 100644 --- a/src/POMDPModelTools.jl +++ b/src/POMDPModelTools.jl @@ -1,133 +1,28 @@ module POMDPModelTools -using POMDPs -using Random -using LinearAlgebra -using SparseArrays -using UnicodePlots -import CommonRLInterface -using Tricks: static_hasmethod +Base.depwarn(""" + Functionality from POMDPModelTools has been moved to POMDPTools. -import POMDPs: actions, actionindex -import POMDPs: states, stateindex -import POMDPs: observations, obsindex -import POMDPs: initialstate, isterminal, discount -import POMDPs: implemented -import Distributions: pdf, mode, mean, support -import Random: rand, rand! -import Statistics: mean -import Base: == + Simply replace `using POMDPModelTools` with `using POMDPTools`. + """, :POMDPModelTools) -import POMDPLinter: @POMDP_require +using Reexport +using Random: AbstractRNG +import POMDPs -export - render -include("visualization.jl") - -# info interface -export - action_info, - solve_info, - update_info -include("info.jl") - -export - ordered_states, - ordered_actions, - ordered_observations -include("ordered_spaces.jl") - -export - TerminalState, - terminalstate -include("terminal_state.jl") - -export GenerativeBeliefMDP -include("generative_belief_mdp.jl") - -export FullyObservablePOMDP -include("fully_observable_pomdp.jl") - -export UnderlyingMDP -include("underlying_mdp.jl") - -export obs_weight -include("obs_weight.jl") - -export - probability_check, - obs_prob_consistency_check, - trans_prob_consistency_check - -export - weighted_iterator -include("distributions/weighted_iteration.jl") - -export - SparseCat -include("distributions/sparse_cat.jl") - -export - BoolDistribution -include("distributions/bool.jl") +import POMDPTools -export - Deterministic -include("distributions/deterministic.jl") +@reexport using POMDPTools.ModelTools +@reexport using POMDPTools.POMDPDistributions +@reexport using POMDPTools.Policies: evaluate +@reexport using POMDPTools.CommonRLIntegration -export - Uniform, - UnsafeUniform -include("distributions/uniform.jl") - -export - ImplicitDistribution -include("distributions/implicit.jl") - -export - StateActionReward, - FunctionSAR, - LazyCachedSAR -include("state_action_reward.jl") +policy_reward_vector = POMDPTools.Policies.policy_reward_vector +mean_reward = POMDPTools.ModelTools.mean_reward # convenient implementations include("convenient_implementations.jl") -export - evaluate -include("policy_evaluation.jl") - -export - showdistribution -include("distributions/pretty_printing.jl") - -export - SparseTabularMDP, - SparseTabularPOMDP, - transition_matrix, - reward_vector, - observation_matrix, - reward_matrix, - observation_matrices -include("sparse_tabular.jl") - -export - transition_matrices, - reward_vectors -include("matrices.jl") - -export - MDPCommonRLEnv, - POMDPCommonRLEnv -include("common_rl/to_env.jl") - -export - RLEnvMDP, - RLEnvPOMDP, - OpaqueRLEnvMDP, - OpaqueRLEnvPOMDP -include("common_rl/from_env.jl") - export add_infonode include("deprecated.jl") diff --git a/src/common_rl/from_env.jl b/src/common_rl/from_env.jl deleted file mode 100644 index 7e3e2c7..0000000 --- a/src/common_rl/from_env.jl +++ /dev/null @@ -1,244 +0,0 @@ -abstract type AbstractRLEnvMDP{S, A} <: MDP{S, A} end -abstract type AbstractRLEnvPOMDP{S, A, O} <: POMDP{S, A, O} end - -const AbstractRLEnvProblem = Union{AbstractRLEnvMDP, AbstractRLEnvPOMDP} - -POMDPs.actions(m::AbstractRLEnvProblem) = RL.actions(m.env) -POMDPs.discount(m::AbstractRLEnvProblem) = m.discount - -function POMDPs.actions(m::AbstractRLEnvProblem, s) - if provided(RL.valid_actions, m.env) - old = RL.state(m.env) - RL.setstate!(m.env, s) - va = RL.valid_actions(m.env) - RL.setstate!(m.env, old) - return va - else - return RL.actions(m.env) - end -end - -function POMDPs.initialstate(m::AbstractRLEnvProblem) - return ImplicitDistribution(m) do m, rng - RL.reset!(m.env) - return RL.state(m.env) - end -end - -function POMDPs.isterminal(m::AbstractRLEnvProblem, s) - old = RL.state(m.env) - RL.setstate!(m.env, s) - t = RL.terminated(m.env) - RL.setstate!(m.env, old) - return t -end - -struct RLEnvMDP{E, S, A} <: AbstractRLEnvMDP{S, A} - env::E - discount::Float64 -end - -""" - RLEnvMDP(env; discount=1.0) - -Create an `MDP` by wrapping a `CommonRLInterface.AbstractEnv`. `state` and `setstate!` from `CommonRLInterface` must be provided, and the `POMDPs` generative model functionality will be provided. -""" -function RLEnvMDP(env; discount=1.0) - S = infer_statetype(env) - if S == Any - @warn("State type inferred for $(typeof(env)) by looking at the return type of state(env) was Any. This could cause significant performance degradation.") - end - return RLEnvMDP{typeof(env), S, eltype(RL.actions(env))}(env, discount) -end - -function POMDPs.gen(m::RLEnvMDP, s, a, rng) - # rng is not currently used - RL.setstate!(m.env, s) - r = RL.act!(m.env, a) - sp = RL.state(m.env) - return (sp=sp, r=r) -end - -""" - RLEnvPOMDP(env; discount=1.0) - -Create an `POMDP` by wrapping a `CommonRLInterface.AbstractEnv`. `state` and `setstate!` from `CommonRLInterface` must be provided, and the `POMDPs` generative model functionality will be provided. -""" -struct RLEnvPOMDP{E, S, A, O} <: AbstractRLEnvPOMDP{S, A, O} - env::E - discount::Float64 -end - -function RLEnvPOMDP(env; discount=1.0) - S = infer_statetype(env) - if S == Any - @warn("State type inferred for $(typeof(env)) by looking at the return type of state(env) was Any. This could cause significant performance degradation.") - end - O = infer_obstype(env) - if S == Any - @warn("Observation type inferred for $(typeof(env)) by looking at the return type of observe(env) was Any. This could cause significant performance degradation.") - end - return RLEnvPOMDP{typeof(env), S, eltype(RL.actions(env)), O}(env, discount) -end - - -function POMDPs.gen(m::RLEnvPOMDP, s, a, rng) - RL.setstate!(m.env, s) - r = RL.act!(m.env, a) - sp = RL.state(m.env) - o = RL.observe(m.env) - return (sp=sp, o=o, r=r) -end - -##################### -# Opaque: for when state and setstate! are not implemented -##################### - -struct OpaqueRLEnvState - age::BigInt -end - -mutable struct OpaqueRLEnvMDP{E, A} <: AbstractRLEnvMDP{OpaqueRLEnvState, A} - env::E - age::BigInt - discount::Float64 -end - -""" - OpaqueRLEnvMDP(env; discount=1.0) - -Wrap a `CommonRLInterface.AbstractEnv` in an `MDP` object. The state will be an `OpaqueRLEnvState` and only simulation will be supported. -""" -function OpaqueRLEnvMDP(env; discount::Float64=1.0) - return OpaqueRLEnvMDP{typeof(env), eltype(RL.actions(env))}(env, 1, discount) -end - -mutable struct OpaqueRLEnvPOMDP{E, A, O} <: AbstractRLEnvPOMDP{OpaqueRLEnvState, A, O} - env::E - age::BigInt - discount::Float64 -end - -""" - OpaqueRLEnvPOMDP(env; discount=1.0) - -Wrap a `CommonRLInterface.AbstractEnv` in an `POMDP` object. The state will be an `OpaqueRLEnvState` and only simulation will be supported. -""" -function OpaqueRLEnvPOMDP(env, discount=1.0) - return OpaqueRLEnvPOMDP{typeof(env), eltype(RL.actions(env)), typeof(RL.observe(env))}(env, 1, discount) -end - -const OpaqueRLEnvProblem = Union{OpaqueRLEnvMDP, OpaqueRLEnvPOMDP} - -function POMDPs.actions(m::OpaqueRLEnvProblem, s::OpaqueRLEnvState) - if provided(RL.valid_actions, m.env) && s.age == m.age - return RL.valid_actions(m.env) - else - return RL.actions(m.env) - end -end - -function POMDPs.initialstate(m::OpaqueRLEnvProblem) - return ImplicitDistribution(m) do m, rng - RL.reset!(m.env) - m.age += 1 - return OpaqueRLEnvState(m.age) - end -end - -function POMDPs.isterminal(m::OpaqueRLEnvProblem, s::OpaqueRLEnvState) - if s.age != m.age - throw(OpaqueRLEnvState(m, s)) - end - return RL.terminated(m.env) -end - - -function POMDPs.gen(m::OpaqueRLEnvMDP, s::OpaqueRLEnvState, a, rng) - if s.age == m.age - r = RL.act!(m.env, a) - m.age += 1 - return (sp=OpaqueRLEnvState(m.age), r=r) - else - throw(OpaqueRLEnvState(m.env, m.age, s)) - end -end - -function POMDPs.gen(m::OpaqueRLEnvPOMDP, s::OpaqueRLEnvState, a, rng) - if s.age == m.age - r = RL.act!(m.env, a) - o = RL.observe(m.env) - m.age += 1 - return (sp=OpaqueRLEnvState(m.age), o=o, r=r) - else - throw(OpaqueRLEnvState(m.env, m.age, s)) - end -end - -function Base.convert(::Type{POMDP}, env::RL.AbstractEnv) - if RL.provided(RL.state, env) - s = RL.state(env) - if RL.provided(RL.setstate!, env, s) - return RLEnvPOMDP(env) - end - end - return OpaqueRLEnvPOMDP(env) -end - -function Base.convert(::Type{MDP}, env::RL.AbstractEnv) - if RL.provided(RL.state, env) - s = RL.state(env) - if RL.provided(RL.setstate!, env, s) - return RLEnvMDP(env) - end - end - return OpaqueRLEnvMDP(env) -end - -Base.convert(E::Type{<:RL.AbstractEnv}, m::AbstractRLEnvProblem) = convert(E, m.env) -Base.convert(::Type{RL.AbstractEnv}, m::AbstractRLEnvProblem) = m.env - -struct OpaqueRLEnvStateError <: Exception - env - env_age::BigInt - s::OpaqueRLEnvState -end - -function Base.showerror(io::IO, e::OpaqueRLEnvStateError) - print(io, "OpaqueRLEnvStateError: ") - print(io, """An attempt was made to interact with the environment encapsulated in an `OpaqueRLEnv(PO)MDP` at a particular state, but the environment had been stepped forward, so it may be in a different state. - - Enironment age: $(e.env_age) - State age: $(e.s.age) - - Suggestions: provide `CommonRLInterface.state(::$(typeof(e.env)))` and `CommonRLInterface.setstate!(::$(typeof(e.env)), s)` so that the environment can be converted to a `RLEnv(PO)MDP` instead of an `OpaqueRLEnv(PO)MDP`. - """) -end - -function infer_statetype(env) - try - return only(Base.return_types(RL.state, Tuple{typeof(env)})) - catch ex - @warn("""Unable to infer state type for $(typeof(env)) because of the following error: - - $(sprint(showerror, ex)) - - Falling back to Any. - """) - return Any - end -end - -function infer_obstype(env) - try - return only(Base.return_types(RL.observe, Tuple{typeof(env)})) - catch ex - @warn("""Unable to infer observation type for $(typeof(env)) because of the following error: - - $(sprint(showerror, ex)) - - Falling back to Any. - """) - return Any - end -end diff --git a/src/common_rl/to_env.jl b/src/common_rl/to_env.jl deleted file mode 100644 index 01a9e61..0000000 --- a/src/common_rl/to_env.jl +++ /dev/null @@ -1,100 +0,0 @@ -const RL = CommonRLInterface - -abstract type AbstractPOMDPsCommonRLEnv <: RL.AbstractEnv end - -RL.actions(env::AbstractPOMDPsCommonRLEnv) = actions(env.m) -RL.terminated(env::AbstractPOMDPsCommonRLEnv) = isterminal(env.m, env.s) - -mutable struct MDPCommonRLEnv{RLO, M<:MDP, S} <: AbstractPOMDPsCommonRLEnv - m::M - s::S -end - -""" - MDPCommonRLEnv(m, [s]) - MDPCommonRLEnv{RLO}(m, [s]) - -Create a CommonRLInterface environment from MDP m; optionally specify the state 's'. - -The `RLO` parameter can be used to specify a type to convert the observation to. By default, this is `AbstractArray`. Use `Any` to disable conversion. -""" -MDPCommonRLEnv{RLO}(m, s=rand(initialstate(m))) where {RLO} = MDPCommonRLEnv{RLO, typeof(m), statetype(m)}(m, s) -MDPCommonRLEnv(m, s=rand(initialstate(m))) = MDPCommonRLEnv{AbstractArray}(m, s) - -function RL.reset!(env::MDPCommonRLEnv) - env.s = rand(initialstate(env.m)) - return nothing -end - -function RL.act!(env::MDPCommonRLEnv, a) - sp, r = @gen(:sp, :r)(env.m, env.s, a) - env.s = sp - return r -end - -RL.observe(env::MDPCommonRLEnv{RLO}) where {RLO} = convert_s(RLO, env.s, env.m) - -RL.@provide RL.clone(env::MDPCommonRLEnv{RLO}) where {RLO} = MDPCommonRLEnv{RLO}(env.m, env.s) -RL.@provide RL.render(env::MDPCommonRLEnv) = render(env.m, (sp=env.s,)) -RL.@provide RL.state(env::MDPCommonRLEnv{RLO}) where {RLO} = convert_s(RLO, env.s, env.m) -RL.@provide RL.valid_actions(env::MDPCommonRLEnv) = actions(env.m, env.s) - -RL.observations(env::MDPCommonRLEnv{RLO}) where {RLO} = (convert_s(RLO, s, env.m) for s in states(env.m)) # should really be some kind of lazy map that handles uncountably infinite spaces -RL.provided(::typeof(RL.observations), ::Type{<:Tuple{MDPCommonRLEnv{<:Any, M, <:Any}}}) where {M} = static_hasmethod(states, Tuple{<:M}) - -RL.@provide function RL.setstate!(env::MDPCommonRLEnv{<:Any, <:Any, S}, s) where S - env.s = convert_s(S, s, env.m) - return nothing -end - -mutable struct POMDPCommonRLEnv{RLO, M<:POMDP, S, O} <: AbstractPOMDPsCommonRLEnv - m::M - s::S - o::O -end - -""" - POMDPCommonRLEnv(m, [s], [o]) - POMDPCommonRLEnv{RLO}(m, [s], [o]) - -Create a CommonRLInterface environment from POMDP m; optionally specify the state 's' and observation 'o'. - -The `RLO` parameter can be used to specify a type to convert the observation to. By default, this is `AbstractArray`. Use `Any` to disable conversion. -""" -POMDPCommonRLEnv{RLO}(m, s=rand(initialstate(m)), o=rand(initialobs(m, s))) where {RLO} = POMDPCommonRLEnv{RLO, typeof(m), statetype(m), obstype(m)}(m, s, o) -POMDPCommonRLEnv(m, s=rand(initialstate(m)), o=rand(initialobs(m, s))) = POMDPCommonRLEnv{AbstractArray}(m, s, o) - -function RL.reset!(env::POMDPCommonRLEnv) - env.s = rand(initialstate(env.m)) - env.o = rand(initialobs(env.m, env.s)) - return nothing -end - -function RL.act!(env::POMDPCommonRLEnv, a) - sp, o, r = @gen(:sp, :o, :r)(env.m, env.s, a) - env.s = sp - env.o = o - return r -end - -RL.observe(env::POMDPCommonRLEnv{RLO}) where {RLO} = convert_o(RLO, env.o, env.m) - -RL.@provide RL.clone(env::POMDPCommonRLEnv{RLO}) where {RLO} = POMDPCommonRLEnv{RLO}(env.m, env.s, env.o) -RL.@provide RL.render(env::POMDPCommonRLEnv) = render(env.m, (sp=env.s, o=env.o)) -RL.@provide RL.state(env::POMDPCommonRLEnv) = (env.s, env.o) -RL.@provide RL.valid_actions(env::POMDPCommonRLEnv) = actions(env.m, env.s) - -RL.observations(env::POMDPCommonRLEnv{RLO}) where {RLO} = (convert_o(RLO, o, env.m) for o in observations(env.m)) # should really be some kind of lazy map that handles uncountably infinite spaces -RL.provided(::typeof(RL.observations), ::Type{<:Tuple{POMDPCommonRLEnv{<:Any, M, <:Any, <:Any}}}) where {M} = static_hasmethod(observations, Tuple{<:M}) - -RL.@provide function RL.setstate!(env::POMDPCommonRLEnv, so) - env.s = first(so) - env.o = last(so) - return nothing -end - -Base.convert(::Type{RL.AbstractEnv}, m::POMDP) = POMDPCommonRLEnv(m) -Base.convert(::Type{RL.AbstractEnv}, m::MDP) = MDPCommonRLEnv(m) - -Base.convert(::Type{MDP}, env::MDPCommonRLEnv) = env.m -Base.convert(::Type{POMDP}, env::POMDPCommonRLEnv) = env.m diff --git a/src/convenient_implementations.jl b/src/convenient_implementations.jl index 1f592de..9e1ec40 100644 --- a/src/convenient_implementations.jl +++ b/src/convenient_implementations.jl @@ -1,10 +1,7 @@ # some implementations for convenience # maintained by Zach Sunberg -rand(rng::AbstractRNG, t::Tuple{Bool, Bool}) = rand(rng, Bool) -rand(t::Tuple{Bool, Bool}) = rand(Bool) - -support(s::AbstractVector) = s -support(s::Tuple) = s -support(r::AbstractRange) = r -support(g::Base.Generator) = g \ No newline at end of file +function POMDPs.support(c::Union{AbstractVector,Tuple,AbstractRange,Base.Generator}) + Base.depwarn("Use of $(typeof(c)) as a distribution is deprecated. Use POMDPTools.Uniform instead.", :support) + return c +end diff --git a/src/deprecated.jl b/src/deprecated.jl index 723ebbf..48aecd8 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -1,4 +1,4 @@ function add_infonode(ddn) - @warn "add_infonode(ddn::DDNStructure) is no longer supported in POMDPModelTools v0.3. It is not needed in POMDPs 0.9." + Base.depwarn("add_infonode(ddn::DDNStructure) is no longer supported in POMDPModelTools v0.3. It is not needed in POMDPs 0.9.", :add_infonode) return ddn end diff --git a/src/distributions/bool.jl b/src/distributions/bool.jl deleted file mode 100644 index 5114645..0000000 --- a/src/distributions/bool.jl +++ /dev/null @@ -1,28 +0,0 @@ -""" - BoolDistribution(p_true) - -Create a distribution over Boolean values (`true` or `false`). - -`p_true` is the probability of the `true` outcome; the probability of `false` is 1-`p_true`. -""" -struct BoolDistribution - p::Float64 # probability of true -end - -pdf(d::BoolDistribution, s::Bool) = s ? d.p : 1.0-d.p - -rand(rng::AbstractRNG, s::Random.SamplerTrivial{BoolDistribution}) = rand(rng) <= s[].p - -Base.iterate(d::BoolDistribution) = ((d.p, true), true) -function Base.iterate(d::BoolDistribution, state::Bool) - if state - return ((1.0 - d.p, false), false) - else - return nothing - end -end - -support(d::BoolDistribution) = [true, false] -Base.length(d::BoolDistribution) = 2 - -Base.show(io::IO, m::MIME"text/plain", d::BoolDistribution) = showdistribution(io, m, d, title="BoolDistribution") diff --git a/src/distributions/deterministic.jl b/src/distributions/deterministic.jl deleted file mode 100644 index d044397..0000000 --- a/src/distributions/deterministic.jl +++ /dev/null @@ -1,19 +0,0 @@ -""" - Deterministic(value) - -Create a deterministic distribution over only one value. - -This is intended to be used when a distribution is required, but the outcome is deterministic. It is equivalent to a Kronecker Delta distribution. -""" -struct Deterministic{T} - val::T -end - -rand(rng::AbstractRNG, s::Random.SamplerTrivial{<:Deterministic}) = s[].val -support(d::Deterministic) = (d.val,) -sampletype(::Type{Deterministic{T}}) where T = T -Random.gentype(::Type{Deterministic{T}}) where T = T -pdf(d::Deterministic, x) = convert(Float64, x == d.val) -mode(d::Deterministic) = d.val -mean(d::Deterministic{N}) where N<:Number = d.val / 1 # / 1 is to make this return a similar type to Statistics.mean -mean(d::Deterministic) = d.val # so that division need not be implemented for the value type diff --git a/src/distributions/implicit.jl b/src/distributions/implicit.jl deleted file mode 100644 index 1246f78..0000000 --- a/src/distributions/implicit.jl +++ /dev/null @@ -1,45 +0,0 @@ -""" - ImplicitDistribution(sample_function, args...) - -Define a distribution that can only be sampled from using `rand`, but has no explicit `pdf`. - -Each time `rand(rng, d::ImplicitDistribution)` is called, -```julia -sample_function(args..., rng) -``` -will be called to generate a new sample. - -`ImplicitDistribution` is designed to be used with anonymous functions or the `do` syntax as follows: - -# Examples - -```julia -ImplicitDistribution(rng->rand(rng)^2) -``` - -```julia -struct MyMDP <: MDP{Float64, Int} end - -function POMDPs.transition(m::MyMDP, s, a) - ImplicitDistribution(s, a) do s, a, rng - return s + a + 0.001*randn(rng) - end -end - -td = transition(MyMDP(), 1.0, 1) -rand(td) # will return a number near 2 -``` -""" -struct ImplicitDistribution{F, A} - f::F - args::A - - # internal constructor needed for single argument case - ImplicitDistribution(f, args...) = new{typeof(f), typeof(args)}(f, args) -end - - -function Base.rand(rng::AbstractRNG, s::Random.SamplerTrivial{<:ImplicitDistribution}) - d = s[] - d.f(d.args..., rng) -end diff --git a/src/distributions/pretty_printing.jl b/src/distributions/pretty_printing.jl deleted file mode 100644 index 0577f65..0000000 --- a/src/distributions/pretty_printing.jl +++ /dev/null @@ -1,36 +0,0 @@ -""" - showdistribution([io], [mime], d) - -Show a UnicodePlots.barplot representation of a distribution. - -# Keyword Arguments - -- `title::String=string(typeof(d))*" distribution"`: title for the barplot. -""" -function showdistribution(io::IO, mime::MIME"text/plain", d; title=string(typeof(d))*" distribution") - limited = get(io, :limit, false) - strings = String[] - probs = Float64[] - - rows = first(get(io, :displaysize, displaysize(io))) - rows -= 6 # Yuck! This magic number is also in Base.print_matrix - - if limited && rows > 1 && length(support(d)) >= rows - for (x,p) in Iterators.take(weighted_iterator(d), rows-1) - push!(strings, sprint(show, x)) # maybe this should have conext=:compact=>true - push!(probs, p) - end - - push!(strings, "") - push!(probs, 1.0-sum(probs)) - else - for (x,p) in weighted_iterator(d) - push!(strings, sprint(show, x)) - push!(probs, p) - end - end - show(io, mime, barplot(strings, probs, title=title)) -end - -showdistribution(io::IO, d; kwargs...) = showdistribution(io, MIME("text/plain"), d; kwargs...) -showdistribution(d; kwargs...) = showdistribution(stdout, d; kwargs...) diff --git a/src/distributions/sparse_cat.jl b/src/distributions/sparse_cat.jl deleted file mode 100644 index dec98b4..0000000 --- a/src/distributions/sparse_cat.jl +++ /dev/null @@ -1,129 +0,0 @@ -""" - SparseCat(values, probabilities) - -Create a sparse categorical distribution. - -`values` is an iterable object containing the possible values (can be of any type) in the distribution that have nonzero probability. `probabilities` is an iterable object that contains the associated probabilities. - -This is optimized for value iteration with a fast implementation of `weighted_iterator`. Both `pdf` and `rand` are order n. -""" -struct SparseCat{V, P} - vals::V - probs::P -end - -# handle cases where probs is an array of something other than numbers (issue #35) -function SparseCat(v, p::AbstractArray) - cp = try - convert(AbstractArray{Float64}, p) - catch - @error("Couldn't convert all probabilities to Float64 when creating a SparseCat distribution. Did you get the arguments in the right order?", values=v, probabilities=p) - rethrow() - end - SparseCat(v, cp) -end -# the method above gets all arrays *except* ones that have a numeric eltype, which are handled below -SparseCat(v, p::AbstractArray{<:Number}) = SparseCat{typeof(v), typeof(p)}(v, p) - -function rand(rng::AbstractRNG, s::Random.SamplerTrivial{<:SparseCat}) - d = s[] - r = sum(d.probs)*rand(rng) - tot = zero(eltype(d.probs)) - for (v, p) in d - tot += p - if r < tot - return v - end - end - if sum(d.probs) <= 0.0 - error(""" - Tried to sample from a SparseCat distribution with probabilities that sum to $(sum(d.probs)). - - vals = $(d.vals) - - probs = $(d.probs) - """) - end - error("Error sampling from SparseCat distribution with vals $(d.vals) and probs $(d.probs)") # try to help with type stability -end - -# slow linear search :( -function pdf(d::SparseCat, s) - for (v, p) in d - if v == s - return p - end - end - return zero(eltype(d.probs)) -end - -function pdf(d::SparseCat{V,P}, s) where {V<:AbstractArray, P<:AbstractArray} - for (i,v) in enumerate(d.vals) - if v == s - return d.probs[i] - end - end - return zero(eltype(d.probs)) -end - - - -support(d::SparseCat) = d.vals - -weighted_iterator(d::SparseCat) = d - -# iterator for general SparseCat -# this has some type stability problems -function Base.iterate(d::SparseCat) - val, vstate = iterate(d.vals) - prob, pstate = iterate(d.probs) - return ((val=>prob), (vstate, pstate)) -end -function Base.iterate(d::SparseCat, dstate::Tuple) - vstate, pstate = dstate - vnext = iterate(d.vals, vstate) - pnext = iterate(d.probs, pstate) - if vnext == nothing || pnext == nothing - return nothing - end - val, vstate_next = vnext - prob, pstate_next = pnext - return ((val=>prob), (vstate_next, pstate_next)) -end - -# iterator for SparseCat with indexed members -const Indexed = Union{AbstractArray, Tuple, NamedTuple} - -function Base.iterate(d::SparseCat{V,P}, state::Integer=1) where {V<:Indexed, P<:Indexed} - if state > length(d) - return nothing - end - return (d.vals[state]=>d.probs[state], state+1) -end - -Base.length(d::SparseCat) = min(length(d.vals), length(d.probs)) -Base.eltype(D::Type{SparseCat{V,P}}) where {V, P} = Pair{eltype(V), eltype(P)} -sampletype(D::Type{SparseCat{V,P}}) where {V, P} = eltype(V) -Random.gentype(D::Type{SparseCat{V,P}}) where {V, P} = eltype(V) - -function mean(d::SparseCat) - vsum = zero(eltype(d.vals)) - for (v, p) in d - vsum += v*p - end - return vsum/sum(d.probs) -end - -function mode(d::SparseCat) - bestp = zero(eltype(d.probs)) - bestv = first(d.vals) - for (v, p) in d - if p >= bestp - bestp = p - bestv = v - end - end - return bestv -end - -Base.show(io::IO, m::MIME"text/plain", d::SparseCat) = showdistribution(io, m, d, title="SparseCat distribution") diff --git a/src/distributions/uniform.jl b/src/distributions/uniform.jl deleted file mode 100644 index 44e464e..0000000 --- a/src/distributions/uniform.jl +++ /dev/null @@ -1,68 +0,0 @@ -struct Uniform{T<:AbstractSet} - set::T -end - -""" - Uniform(collection) - -Create a uniform categorical distribution over a collection of objects. - -The objects in the collection must be unique (this is tested on construction), and will be stored in a `Set`. To avoid this overhead, use `UnsafeUniform`. -""" -function Uniform(c) - set = Set(c) - if length(c) > length(set) - error(""" - Error constructing Uniform($c). - - Objects must be unique (that is, length(Set(c)) == length(c)). - """ - ) - end - return Uniform(set) -end - -support(d::Uniform) = d.set -sampletype(::Type{Uniform{T}}) where T = eltype(T) -Random.gentype(::Type{Uniform{T}}) where T = eltype(T) - -function pdf(d::Uniform, s) - if s in d.set - return 1.0/length(d.set) - else - return 0.0 - end -end - -Base.show(io::IO, m::MIME"text/plain", d::Uniform) = showdistribution(io, m, d, title="Uniform distribution") - -""" - UnsafeUniform(collection) - -Create a uniform categorical distribution over a collection of objects. - -No checks are performed to ensure uniqueness or check whether an object is actually in the set when evaluating the pdf. -""" -struct UnsafeUniform{T} - collection::T -end - -pdf(d::UnsafeUniform, s) = 1.0/length(d.collection) -support(d::UnsafeUniform) = d.collection -sampletype(::Type{UnsafeUniform{T}}) where T = eltype(T) -Random.gentype(::Type{UnsafeUniform{T}}) where T = eltype(T) - -# Common Implementations - -const Unif = Union{Uniform,UnsafeUniform} - -rand(rng::AbstractRNG, s::Random.SamplerTrivial{<:Unif}) = rand(rng, support(s[])) -mean(d::Unif) = mean(support(d)) -mode(d::Unif) = mode(support(d)) - -function weighted_iterator(d::Unif) - p = 1.0/length(support(d)) - return (x=>p for x in support(d)) -end - -Base.show(io::IO, m::MIME"text/plain", d::UnsafeUniform) = showdistribution(io, m, d, title="UnsafeUniform distribution") diff --git a/src/distributions/weighted_iteration.jl b/src/distributions/weighted_iteration.jl deleted file mode 100644 index 05c21b0..0000000 --- a/src/distributions/weighted_iteration.jl +++ /dev/null @@ -1,19 +0,0 @@ -""" - weighted_iterator(d) - -Return an iterator through pairs of the values and probabilities in distribution `d`. - -This is designed to speed up value iteration. Distributions are encouraged to provide a custom optimized implementation if possible. - -# Example -```julia-repl -julia> d = BoolDistribution(0.7) -BoolDistribution(0.7) - -julia> collect(weighted_iterator(d)) -2-element Array{Pair{Bool,Float64},1}: - true => 0.7 - false => 0.3 -``` -""" -weighted_iterator(d) = (x=>pdf(d, x) for x in support(d)) diff --git a/src/fully_observable_pomdp.jl b/src/fully_observable_pomdp.jl deleted file mode 100644 index d9a5cfb..0000000 --- a/src/fully_observable_pomdp.jl +++ /dev/null @@ -1,45 +0,0 @@ -""" - FullyObservablePOMDP(mdp) - -Turn `MDP` `mdp` into a `POMDP` where the observations are the states of the MDP. -""" -struct FullyObservablePOMDP{M,S,A} <: POMDP{S,A,S} - mdp::M -end - -function FullyObservablePOMDP(m::MDP) - return FullyObservablePOMDP{typeof(m), statetype(m), actiontype(m)}(m) -end - -mdptype(::Type{FullyObservablePOMDP{M,S,A}}) where {M,S,A} = M - -POMDPs.observations(pomdp::FullyObservablePOMDP) = states(pomdp.mdp) -POMDPs.obsindex(pomdp::FullyObservablePOMDP{S, A}, o::S) where {S, A} = stateindex(pomdp.mdp, o) - -POMDPs.convert_o(T::Type{V}, o, pomdp::FullyObservablePOMDP) where {V<:AbstractArray} = convert_s(T, o, pomdp.mdp) -POMDPs.convert_o(T::Type{S}, vec::V, pomdp::FullyObservablePOMDP) where {S,V<:AbstractArray} = convert_s(T, vec, pomdp.mdp) - -function POMDPs.observation(pomdp::FullyObservablePOMDP, a, sp) - return Deterministic(sp) -end - -function POMDPs.observation(pomdp::FullyObservablePOMDP, s, a, sp) - return Deterministic(sp) -end - -# inherit other function from the MDP type - -POMDPs.states(pomdp::FullyObservablePOMDP) = states(pomdp.mdp) -POMDPs.actions(pomdp::FullyObservablePOMDP) = actions(pomdp.mdp) -POMDPs.transition(pomdp::FullyObservablePOMDP, s, a) = transition(pomdp.mdp, s, a) -POMDPs.isterminal(pomdp::FullyObservablePOMDP, s) = isterminal(pomdp.mdp, s) -POMDPs.discount(pomdp::FullyObservablePOMDP) = discount(pomdp.mdp) -POMDPs.stateindex(pomdp::FullyObservablePOMDP, s) = stateindex(pomdp.mdp, s) -POMDPs.actionindex(pomdp::FullyObservablePOMDP, a) = actionindex(pomdp.mdp, a) -POMDPs.convert_s(T::Type{V}, s, pomdp::FullyObservablePOMDP) where V<:AbstractArray = convert_s(T, s, pomdp.mdp) -POMDPs.convert_s(T::Type{S}, vec::V, pomdp::FullyObservablePOMDP) where {S,V<:AbstractArray} = convert_s(T, vec, pomdp.mdp) -POMDPs.convert_a(T::Type{V}, a, pomdp::FullyObservablePOMDP) where V<:AbstractArray = convert_a(T, a, pomdp.mdp) -POMDPs.convert_a(T::Type{A}, vec::V, pomdp::FullyObservablePOMDP) where {A,V<:AbstractArray} = convert_a(T, vec, pomdp.mdp) -POMDPs.reward(pomdp::FullyObservablePOMDP, s, a) = reward(pomdp.mdp, s, a) -POMDPs.initialstate(m::FullyObservablePOMDP) = initialstate(m.mdp) -POMDPs.initialobs(m::FullyObservablePOMDP, s) = Deterministic(s) diff --git a/src/generative_belief_mdp.jl b/src/generative_belief_mdp.jl deleted file mode 100644 index e52b9ae..0000000 --- a/src/generative_belief_mdp.jl +++ /dev/null @@ -1,61 +0,0 @@ -""" - GenerativeBeliefMDP(pomdp, updater) - -Create a generative model of the belief MDP corresponding to POMDP `pomdp` with belief updates performed by `updater`. -""" -struct GenerativeBeliefMDP{P<:POMDP, U<:Updater, B, A} <: MDP{B, A} - pomdp::P - updater::U -end - -function GenerativeBeliefMDP(pomdp::P, up::U) where {P<:POMDP, U<:Updater} - # XXX hack to determine belief type - b0 = initialize_belief(up, initialstate(pomdp)) - GenerativeBeliefMDP{P, U, typeof(b0), actiontype(pomdp)}(pomdp, up) -end - -function POMDPs.gen(bmdp::GenerativeBeliefMDP, b, a, rng::AbstractRNG) - s = rand(rng, b) - if isterminal(bmdp.pomdp, s) - bp = gbmdp_handle_terminal(bmdp.pomdp, bmdp.updater, b, s, a, rng::AbstractRNG)::typeof(b) - return (sp=bp, r=0.0) - end - sp, o, r = @gen(:sp, :o, :r)(bmdp.pomdp, s, a, rng) # maybe this should have been generate_or? - bp = update(bmdp.updater, b, a, o) - return (sp=bp, r=r) -end - -actions(bmdp::GenerativeBeliefMDP{P,U,B,A}, b::B) where {P,U,B,A} = actions(bmdp.pomdp, b) -actions(bmdp::GenerativeBeliefMDP) = actions(bmdp.pomdp) - -isterminal(bmdp::GenerativeBeliefMDP, b) = all(isterminal(bmdp.pomdp, s) for s in support(b)) - -discount(bmdp::GenerativeBeliefMDP) = discount(bmdp.pomdp) - -const warned_about_gbmdp_terminal=false - -# override this if you want to handle it in a special way -function gbmdp_handle_terminal(pomdp::POMDP, updater::Updater, b, s, a, rng) - global warned_about_gbmdp_terminal - if !warned_about_gbmdp_terminal - @warn(""" - Sampled a terminal state for a GenerativeBeliefMDP transition - not sure how to proceed, but will try. - - See $(@__FILE__) and implement a new method of POMDPToolbox.gbmdp_handle_terminal if you want special behavior in this case. - - """) - warned_about_gbmdp_terminal = true - end - sp, o, r = @gen(:sp, :o, :r)(pomdp, s, a, rng) - bp = update(updater, b, a, o) - return bp -end - -function initialstate(bmdp::GenerativeBeliefMDP) - return Deterministic(initialize_belief(bmdp.updater, initialstate(bmdp.pomdp))) -end - -# deprecated in POMDPs v0.9 -function initialstate(bmdp::GenerativeBeliefMDP, rng::AbstractRNG) - return initialize_belief(bmdp.updater, initialstate(bmdp.pomdp)) -end diff --git a/src/info.jl b/src/info.jl deleted file mode 100644 index a204cd1..0000000 --- a/src/info.jl +++ /dev/null @@ -1,35 +0,0 @@ -# functions for passing out info from simulations, similar to the info return from openai gym -# maintained by @zsunberg - -""" - a, ai = action_info(policy, x) - -Return a tuple containing the action determined by policy 'p' at state or belief 'x' and information (usually a `NamedTuple`, `Dict` or `nothing`) from the calculation of that action. - -By default, returns `nothing` as info. -""" -function action_info(p::Policy, x) - return action(p, x), nothing -end - -""" - policy, si = solve_info(solver, problem) - -Return a tuple containing the policy determined by a solver and information (usually a `NamedTuple`, `Dict` or `nothing`) from the calculation of that policy. - -By default, returns `nothing` as info. -""" -function solve_info(s::Solver, problem::Union{POMDP,MDP}) - return solve(s, problem), nothing -end - -""" - bp, i = update_info(updater, b, a, o) - -Return a tuple containing the new belief and information (usually a `NamedTuple`, `Dict` or `nothing`) from the belief update. - -By default, returns `nothing` as info. -""" -function update_info(up::Updater, b, a, o) - return update(up, b, a, o), nothing -end diff --git a/src/matrices.jl b/src/matrices.jl deleted file mode 100644 index bf1285c..0000000 --- a/src/matrices.jl +++ /dev/null @@ -1,37 +0,0 @@ -""" - transition_matrices(m::Union{MDP,POMDP}) - transition_matrices(m; sparse=true) - -Construct transition matrices for (PO)MDP m. - -The returned object is an associative object (usually a Dict), where the keys are actions. Each value in this object is an AbstractMatrix where the row corresponds to the state index of s and the column corresponds to the state index of s'. The entry in the matrix is the probability of transitioning from state s to state s'. -""" -function transition_matrices(m::Union{MDP,POMDP}; sparse::Bool=true) - transmats = POMDPModelTools.transition_matrix_a_s_sp(m) - if !sparse - transmats = [convert(Matrix, t) for t in transmats] - end - mtype = typeof(first(transmats)) - oa = ordered_actions(m) - return Dict{actiontype(m), mtype}(oa[ai]=>transmats[ai] for ai in 1:length(actions(m))) -end - -""" - reward_vectors(m::Union{MDP, POMDP}) - -Construct reward vectors for (PO)MDP m. - -The returned object is an associative object (usually a Dict), where the keys are actions. Each value in this object is an AbstractVector where the index corresponds to the state index of s and the entry is the reward for that state. -""" -function reward_vectors(m::Union{MDP,POMDP}) - d = Dict{actiontype(m), Vector{Float64}}() - r = StateActionReward(m) - for a in actions(m) - rv = zeros(length(states(m))) - for s in states(m) - rv[stateindex(m, s)] = r(s, a) - end - d[a] = rv - end - return d -end diff --git a/src/obs_weight.jl b/src/obs_weight.jl deleted file mode 100644 index 7a2a4d0..0000000 --- a/src/obs_weight.jl +++ /dev/null @@ -1,11 +0,0 @@ -# obs_weight is a shortcut function for getting the relative likelihood of an observation without having to construct the observation distribution. Useful for particle filtering -# maintained by @zsunberg - -""" - obs_weight(pomdp, s, a, sp, o) - -Return a weight proportional to the likelihood of receiving observation o from state sp (and a and s if they are present). - -This is a useful shortcut for particle filtering so that the observation distribution does not have to be represented. -""" -obs_weight(p, s, a, sp, o) = pdf(observation(p, s, a, sp), o) diff --git a/src/ordered_spaces.jl b/src/ordered_spaces.jl deleted file mode 100644 index df759e4..0000000 --- a/src/ordered_spaces.jl +++ /dev/null @@ -1,83 +0,0 @@ -# these functions return vectors of states, actions and observations, ordered according to stateindex, actionindex, etc. - -""" - ordered_actions(mdp) - -Return an `AbstractVector` of actions ordered according to `actionindex(mdp, a)`. - -`ordered_actions(mdp)` will always return an `AbstractVector{A}` `v` containing all of the actions in `actions(mdp)` in the order such that `actionindex(mdp, v[i]) == i`. You may wish to override this for your problem for efficiency. -""" -ordered_actions(mdp::Union{MDP,POMDP}) = ordered_vector(actiontype(typeof(mdp)), a->actionindex(mdp,a), actions(mdp), "action") - -""" - ordered_states(mdp) - -Return an `AbstractVector` of states ordered according to `stateindex(mdp, a)`. - -`ordered_states(mdp)` will always return a `AbstractVector{A}` `v` containing all of the states in `states(mdp)` in the order such that `stateindex(mdp, v[i]) == i`. You may wish to override this for your problem for efficiency. -""" -ordered_states(mdp::Union{MDP,POMDP}) = ordered_vector(statetype(typeof(mdp)), s->stateindex(mdp,s), states(mdp), "state") - -""" - ordered_observations(pomdp) - -Return an `AbstractVector` of observations ordered according to `obsindex(pomdp, a)`. - -`ordered_observations(mdp)` will always return a `AbstractVector{A}` `v` containing all of the observations in `observations(pomdp)` in the order such that `obsindex(pomdp, v[i]) == i`. You may wish to override this for your problem for efficiency. -""" -ordered_observations(pomdp::POMDP) = ordered_vector(obstype(typeof(pomdp)), o->obsindex(pomdp,o), observations(pomdp), "observation") - -function ordered_vector(T::Type, index::Function, space, singular, plural=singular*"s") - len = length(space) - a = Array{T}(undef, len) - gotten = falses(len) - for x in space - id = index(x) - if id > len || id < 1 - error(""" - $(singular)index(...) returned an index that was out of bounds for $singular $x. - - index was $id. - - n_$plural(...) was $len. - """) - end - a[id] = x - gotten[id] = true - end - if !all(gotten) - missing = findall(.!gotten) - @warn """ - Problem creating an ordered vector of $plural in ordered_$plural(...). There is likely a mistake in $(singular)index(...) or n_$plural(...). - - n_$plural(...) was $len. - - $plural corresponding to the following indices were missing from $plural(...): $missing - """ - end - return a -end - -@POMDP_require ordered_actions(mdp::Union{MDP,POMDP}) begin - P = typeof(mdp) - @req actionindex(::P, ::actiontype(P)) - @req actions(::P) - as = actions(mdp) - @req length(::typeof(as)) -end - -@POMDP_require ordered_states(mdp::Union{MDP,POMDP}) begin - P = typeof(mdp) - @req stateindex(::P, ::statetype(P)) - @req states(::P) - ss = states(mdp) - @req length(::typeof(ss)) -end - -@POMDP_require ordered_observations(mdp::Union{MDP,POMDP}) begin - P = typeof(mdp) - @req obsindex(::P, ::obstype(P)) - @req observations(::P) - os = observations(mdp) - @req length(::typeof(os)) -end diff --git a/src/policy_evaluation.jl b/src/policy_evaluation.jl deleted file mode 100644 index dfe90db..0000000 --- a/src/policy_evaluation.jl +++ /dev/null @@ -1,84 +0,0 @@ -""" -Value function for a policy on an MDP. - -If `v` is a `DiscreteValueFunction`, access the value for a state with `v(s)` -""" -struct DiscreteValueFunction{M<:MDP} <: Function - m::M - values::Vector{Float64} -end - -(v::DiscreteValueFunction)(s) = v.values[stateindex(v.m, s)] - -""" - evaluate(m::MDP, p::Policy) - evaluate(m::MDP, p::Policy; rewardfunction=POMDPs.reward) - -Calculate the value for a policy on an MDP using the approach in equation 4.2.2 of Kochenderfer, *Decision Making Under Uncertainty*, 2015. - -Returns a DiscreteValueFunction, which maps states to values. - -# Example -``` -using POMDPModelTools, POMDPPolicies, POMDPModels -m = SimpleGridWorld() -u = evaluate(m, FunctionPolicy(x->:left)) -u([1,1]) # value of always moving left starting at state [1,1] -``` -""" -function evaluate(m::MDP, p::Policy; rewardfunction=POMDPs.reward) - t = policy_transition_matrix(m, p) - r = policy_reward_vector(m, p, rewardfunction=rewardfunction) - u = (I-discount(m)*t)\r - return DiscreteValueFunction(m, u) -end - -""" - policy_transition_matrix(m::Union{MDP, POMDP}, p::Policy) - -Create an |S|x|S| sparse transition matrix for a given policy. - -The row corresponds to the current state and column to the next state. Corresponds to ``T^π`` in equation (4.7) in Kochenderfer, *Decision Making Under Uncertainty*, 2015. -""" -function policy_transition_matrix(m::Union{MDP,POMDP}, p::Policy) - rows = Int[] - cols = Int[] - probs = Float64[] - state_space = states(m) - ns = length(state_space) - for s in state_space - if !isterminal(m, s) # if terminal, the transition probabilities are all just zero - si = stateindex(m, s) - a = action(p, s) - td = transition(m, s, a) - for (sp, p) in weighted_iterator(td) - if p > 0.0 - spi = stateindex(m, sp) - push!(rows, si) - push!(cols, spi) - push!(probs, p) - end - end - end - end - - return sparse(rows, cols, probs, ns, ns) -end - -function policy_reward_vector(m::Union{MDP,POMDP}, p::Policy; rewardfunction=POMDPs.reward) - state_space = states(m) - r = zeros(length(state_space)) - for s in state_space - if !isterminal(m, s) # if terminal, the transition probabilities are all just zero - si = stateindex(m, s) - a = action(p, s) - td = transition(m, s, a) - for (sp, p) in weighted_iterator(td) - if p > 0.0 - r[si] += p*rewardfunction(m, s, a, sp) - end - end - end - end - return r -end diff --git a/src/sparse_tabular.jl b/src/sparse_tabular.jl deleted file mode 100644 index 455e651..0000000 --- a/src/sparse_tabular.jl +++ /dev/null @@ -1,324 +0,0 @@ -""" - SparseTabularMDP - -An MDP object where states and actions are integers and the transition is represented by a list of sparse matrices. -This data structure can be useful to exploit in vectorized algorithm (e.g. see SparseValueIterationSolver). -The recommended way to access the transition and reward matrices is through the provided accessor functions: `transition_matrix` and `reward_vector`. - -# Fields -- `T::Vector{SparseMatrixCSC{Float64, Int64}}` The transition model is represented as a vector of sparse matrices (one for each action). `T[a][s, sp]` the probability of transition from `s` to `sp` taking action `a`. -- `R::Array{Float64, 2}` The reward is represented as a matrix where the rows are states and the columns actions: `R[s, a]` is the reward of taking action `a` in sate `s`. -- `terminal_states::Set{Int64}` Stores the terminal states -- `discount::Float64` The discount factor - -# Constructors - -- `SparseTabularMDP(mdp::MDP)` : One can provide the matrices to the default constructor or one can construct a `SparseTabularMDP` from any discrete state MDP defined using the explicit interface. -Note that constructing the transition and reward matrices requires to iterate over all the states and can take a while. -To learn more information about how to define an MDP with the explicit interface please visit https://juliapomdp.github.io/POMDPs.jl/latest/explicit/ . -- `SparseTabularMDP(smdp::SparseTabularMDP; transition, reward, discount)` : This constructor returns a new sparse MDP that is a copy of the original smdp except for the field specified by the keyword arguments. - -""" -struct SparseTabularMDP <: MDP{Int64, Int64} - T::Vector{SparseMatrixCSC{Float64, Int64}} # T[a][s, sp] - R::Array{Float64, 2} # R[s, a] - terminal_states::Set{Int64} - discount::Float64 -end - -function SparseTabularMDP(mdp::MDP) - T = transition_matrix_a_s_sp(mdp) - R = reward_s_a(mdp) - ts = terminal_states_set(mdp) - return SparseTabularMDP(T, R, ts, discount(mdp)) -end - -@POMDP_require SparseTabularMDP(mdp::MDP) begin - P = typeof(mdp) - S = statetype(P) - A = actiontype(P) - @req discount(::P) - @subreq ordered_states(mdp) - @subreq ordered_actions(mdp) - @req transition(::P,::S,::A) - @req reward(::P,::S,::A,::S) - @req stateindex(::P,::S) - @req actionindex(::P, ::A) - @req actions(::P, ::S) - as = actions(mdp) - ss = states(mdp) - @req length(::typeof(as)) - @req length(::typeof(ss)) - a = first(as) - s = first(ss) - dist = transition(mdp, s, a) - D = typeof(dist) - @req support(::D) - @req pdf(::D,::S) -end - -function SparseTabularMDP(mdp::SparseTabularMDP; - transition::Union{Nothing, Vector{SparseMatrixCSC{Float64, Int64}}} = nothing, - reward::Union{Nothing, Array{Float64, 2}} = nothing, - discount::Union{Nothing, Float64} = nothing, - terminal_states::Union{Nothing, Set{Int64}} = nothing) - T = transition != nothing ? transition : mdp.T - R = reward != nothing ? reward : mdp.R - d = discount != nothing ? discount : mdp.discount - ts = terminal_states != nothing ? terminal_states : mdp.terminal_states - return SparseTabularMDP(T, R, ts, d) -end - -""" - SparseTabularPOMDP - -A POMDP object where states and actions are integers and the transition and observation distributions are represented by lists of sparse matrices. -This data structure can be useful to exploit in vectorized algorithms to gain performance (e.g. see SparseValueIterationSolver). -The recommended way to access the transition, reward, and observation matrices is through the provided accessor functions: `transition_matrix`, `reward_vector`, `observation_matrix`. - -# Fields -- `T::Vector{SparseMatrixCSC{Float64, Int64}}` The transition model is represented as a vector of sparse matrices (one for each action). `T[a][s, sp]` the probability of transition from `s` to `sp` taking action `a`. -- `R::Array{Float64, 2}` The reward is represented as a matrix where the rows are states and the columns actions: `R[s, a]` is the reward of taking action `a` in sate `s`. -- `O::Vector{SparseMatrixCSC{Float64, Int64}}` The observation model is represented as a vector of sparse matrices (one for each action). `O[a][sp, o]` is the probability of observing `o` from state `sp` after having taken action `a`. -- `terminal_states::Set{Int64}` Stores the terminal states -- `discount::Float64` The discount factor - -# Constructors - -- `SparseTabularPOMDP(pomdp::POMDP)` : One can provide the matrices to the default constructor or one can construct a `SparseTabularPOMDP` from any discrete state MDP defined using the explicit interface. -Note that constructing the transition and reward matrices requires to iterate over all the states and can take a while. -To learn more information about how to define an MDP with the explicit interface please visit https://juliapomdp.github.io/POMDPs.jl/latest/explicit/ . -- `SparseTabularPOMDP(spomdp::SparseTabularMDP; transition, reward, observation, discount)` : This constructor returns a new sparse POMDP that is a copy of the original smdp except for the field specified by the keyword arguments. - -""" -struct SparseTabularPOMDP <: POMDP{Int64, Int64, Int64} - T::Vector{SparseMatrixCSC{Float64, Int64}} # T[a][s, sp] - R::Array{Float64, 2} # R[s,sp] - O::Vector{SparseMatrixCSC{Float64, Int64}} # O[a][sp, o] - terminal_states::Set{Int64} - discount::Float64 -end - -function SparseTabularPOMDP(pomdp::POMDP) - T = transition_matrix_a_s_sp(pomdp) - R = reward_s_a(pomdp) - O = observation_matrix_a_sp_o(pomdp) - ts = terminal_states_set(pomdp) - return SparseTabularPOMDP(T, R, O, ts, discount(pomdp)) -end - -@POMDP_require SparseTabularPOMDP(pomdp::POMDP) begin - P = typeof(pomdp) - S = statetype(P) - A = actiontype(P) - O = obstype(P) - @req discount(::P) - @subreq ordered_states(pomdp) - @subreq ordered_actions(pomdp) - @subreq ordered_observations(pomdp) - @req transition(::P,::S,::A) - @req reward(::P,::S,::A,::S) - @req observation(::P, ::A, ::S) - @req stateindex(::P,::S) - @req actionindex(::P, ::A) - @req actions(::P, ::S) - @req observations(::P) - @req obsindex(::P, ::O) - as = actions(pomdp) - ss = states(pomdp) - @req length(::typeof(as)) - @req length(::typeof(ss)) - a = first(as) - s = first(ss) - dist = transition(pomdp, s, a) - D = typeof(dist) - @req support(::D) - @req pdf(::D,::S) - odist = observation(pomdp, a, s) - OD = typeof(odist) - @req support(::OD) - @req pdf(::OD, ::O) -end - - -function SparseTabularPOMDP(pomdp::SparseTabularPOMDP; - transition::Union{Nothing, Vector{SparseMatrixCSC{Float64, Int64}}} = nothing, - reward::Union{Nothing, Array{Float64, 2}} = nothing, - observation::Union{Nothing, Vector{SparseMatrixCSC{Float64, Int64}}} = nothing, - discount::Union{Nothing, Float64} = nothing, - terminal_states::Union{Nothing, Set{Int64}} = nothing) - T = transition != nothing ? transition : pomdp.T - R = reward != nothing ? reward : pomdp.R - d = discount != nothing ? discount : pomdp.discount - O = observation != nothing ? transition : pomdp.O - ts = terminal_states != nothing ? terminal_states : pomdp.terminal_states - return SparseTabularPOMDP(T, R, O, ts, d) -end - -const SparseTabularProblem = Union{SparseTabularMDP, SparseTabularPOMDP} - - -function transition_matrix_a_s_sp(mdp::Union{MDP, POMDP}) - # Thanks to zach - na = length(actions(mdp)) - state_space = states(mdp) - ns = length(state_space) - transmat_row_A = [Int64[] for _ in 1:na] - transmat_col_A = [Int64[] for _ in 1:na] - transmat_data_A = [Float64[] for _ in 1:na] - - for s in state_space - si = stateindex(mdp, s) - for a in actions(mdp, s) - ai = actionindex(mdp, a) - if isterminal(mdp, s) # if terminal, there is a probability of 1 of staying in that state - push!(transmat_row_A[ai], si) - push!(transmat_col_A[ai], si) - push!(transmat_data_A[ai], 1.0) - else - td = transition(mdp, s, a) - for (sp, p) in weighted_iterator(td) - if p > 0.0 - spi = stateindex(mdp, sp) - push!(transmat_row_A[ai], si) - push!(transmat_col_A[ai], spi) - push!(transmat_data_A[ai], p) - end - end - end - end - end - transmats_A_S_S2 = [sparse(transmat_row_A[a], transmat_col_A[a], transmat_data_A[a], ns, ns) for a in 1:na] - # if an action is not valid from a state, the transition is 0.0 everywhere - # @assert all(all(sum(transmats_A_S_S2[a], dims=2) .≈ ones(ns)) for a in 1:na) "Transition probabilities must sum to 1" - return transmats_A_S_S2 -end - -function reward_s_a(mdp::Union{MDP, POMDP}) - state_space = states(mdp) - action_space = actions(mdp) - reward_S_A = fill(-Inf, (length(state_space), length(action_space))) # set reward for all actions to -Inf unless they are in actions(mdp, s) - for s in state_space - if isterminal(mdp, s) - reward_S_A[stateindex(mdp, s), :] .= 0.0 - else - for a in actions(mdp, s) - td = transition(mdp, s, a) - r = 0.0 - for (sp, p) in weighted_iterator(td) - if p > 0.0 - r += p*reward(mdp, s, a, sp) - end - end - reward_S_A[stateindex(mdp, s), actionindex(mdp, a)] = r - end - end - end - return reward_S_A -end - - -function terminal_states_set(mdp::Union{MDP, POMDP}) - ts = Set{Int64}() - for s in states(mdp) - if isterminal(mdp, s) - si = stateindex(mdp, s) - push!(ts, si) - end - end - return ts -end - -function observation_matrix_a_sp_o(pomdp::POMDP) - state_space, action_space, obs_space = states(pomdp), actions(pomdp), observations(pomdp) - na, ns, no = length(action_space), length(state_space), length(obs_space) - obsmat_row_A = [Int64[] for _ in 1:na] - obsmat_col_A = [Int64[] for _ in 1:na] - obsmat_data_A = [Float64[] for _ in 1:na] - - for sp in state_space - spi = stateindex(pomdp, sp) - for a in action_space - ai = actionindex(pomdp, a) - od = observation(pomdp, a, sp) - for (o, p) in weighted_iterator(od) - if p > 0.0 - oi = obsindex(pomdp, o) - push!(obsmat_row_A[ai], spi) - push!(obsmat_col_A[ai], oi) - push!(obsmat_data_A[ai], p) - end - end - end - end - obsmats_A_SP_O = [sparse(obsmat_row_A[a], obsmat_col_A[a], obsmat_data_A[a], ns, no) for a in 1:na] - @assert all(all(sum(obsmats_A_SP_O[a], dims=2) .≈ ones(ns)) for a in 1:na) "Observation probabilities must sum to 1" - return obsmats_A_SP_O -end - -# MDP and POMDP common methods - -POMDPs.states(p::SparseTabularProblem) = 1:size(p.T[1], 1) -POMDPs.actions(p::SparseTabularProblem) = 1:size(p.T, 1) -POMDPs.actions(p::SparseTabularProblem, s::Int64) = [a for a in actions(p) if sum(transition_matrix(p, a)) ≈ size(p.T[1], 1)] - -POMDPs.stateindex(::SparseTabularProblem, s::Int64) = s -POMDPs.actionindex(::SparseTabularProblem, a::Int64) = a - -POMDPs.discount(p::SparseTabularProblem) = p.discount - -POMDPs.transition(p::SparseTabularProblem, s::Int64, a::Int64) = SparseCat(findnz(p.T[a][s, :])...) # XXX not memory efficient - -POMDPs.reward(p::SparseTabularProblem, s::Int64, a::Int64) = p.R[s, a] - -POMDPs.isterminal(p::SparseTabularProblem, s::Int64) = s ∈ p.terminal_states - -""" - transition_matrix(p::SparseTabularProblem, a) -Accessor function for the transition model of a sparse tabular problem. -It returns a sparse matrix containing the transition probabilities when taking action a: T[s, sp] = Pr(sp | s, a). -""" -transition_matrix(p::SparseTabularProblem, a) = p.T[a] - -""" - transition_matrices(p::SparseTabularProblem) -Accessor function for the transition model of a sparse tabular problem. -It returns a list of sparse matrices for each action of the problem. -""" -transition_matrices(p::SparseTabularProblem) = p.T - -""" - reward_vector(p::SparseTabularProblem, a) -Accessor function for the reward function of a sparse tabular problem. -It returns a vector containing the reward for all the states when taking action a: R(s, a). -The length of the return vector is equal to the number of states. -""" -reward_vector(p::SparseTabularProblem, a) = view(p.R, :, a) - -""" - reward_matrix(p::SparseTabularProblem) -Accessor function for the reward matrix R[s, a] of a sparse tabular problem. -""" -reward_matrix(p::SparseTabularProblem) = p.R - -# POMDP only methods - -POMDPs.observations(p::SparseTabularPOMDP) = 1:size(p.O[1], 2) - -POMDPs.observation(p::SparseTabularPOMDP, a::Int64, sp::Int64) = SparseCat(findnz(p.O[a][sp, :])...) - -POMDPs.obsindex(p::SparseTabularPOMDP, o::Int64) = o - -""" - observation_matrix(p::SparseTabularPOMDP, a::Int64) -Accessor function for the observation model of a sparse tabular POMDP. -It returns a sparse matrix containing the observation probabilities when having taken action a: O[sp, o] = Pr(o | sp, a). -""" -observation_matrix(p::SparseTabularPOMDP, a::Int64) = p.O[a] - -""" - observation_matrices(p::SparseTabularPOMDP) -Accessor function for the observation model of a sparse tabular POMDP. -It returns a list of sparse matrices for each action of the problem. -""" -observation_matrices(p::SparseTabularPOMDP) = p.O diff --git a/src/state_action_reward.jl b/src/state_action_reward.jl deleted file mode 100644 index 3f3f85a..0000000 --- a/src/state_action_reward.jl +++ /dev/null @@ -1,102 +0,0 @@ -""" - StateActionReward(m::Union{MDP,POMDP}) - -Robustly create a reward function that depends only on the state and action. - -If `reward(m, s, a)` is implemented, that will be used, otherwise the mean of `reward(m, s, a, sp)` for MDPs or `reward(m, s, a, sp, o)` for POMDPs will be used. - -# Example -```jldoctest -using POMDPs -using POMDPModels -using POMDPModelTools - -m = BabyPOMDP() - -rm = StateActionReward(m) - -rm(true, true) - -# output - --15.0 -``` -""" -abstract type StateActionReward end - -function StateActionReward(m) - if hasmethod(reward, Tuple{typeof(m), statetype(m), actiontype(m)}) - return FunctionSAR(m) - else - return LazyCachedSAR(m) - end -end - -struct FunctionSAR{M} <: StateActionReward - m::M -end - -function (sar::FunctionSAR)(s, a) - if isterminal(sar.m, s) - return 0.0 - else - return reward(sar.m, s, a) - end -end - -struct LazyCachedSAR{M} <: StateActionReward - m::M - cache::Matrix{Union{Missing,Float64}} -end - -function LazyCachedSAR(m) - ns = length(states(m)) - na = length(actions(m)) - return LazyCachedSAR(m, Matrix{Union{Missing, Float64}}(missing, ns, na)) -end - -function (sar::LazyCachedSAR)(s, a)::Float64 - si = stateindex(sar.m, s) - ai = actionindex(sar.m, a) - entry = sar.cache[si, ai] - if ismissing(entry) - r = mean_reward(sar.m, s, a) - sar.cache[si, ai] = r - else - r = entry - end - return r -end - -function mean_reward(m::MDP, s, a) - if isterminal(m, s) - return 0.0 - else - td = transition(m, s, a) - rsum = 0.0 - wsum = 0.0 - for (sp, w) in weighted_iterator(td) - rsum += w*reward(m, s, a, sp) - wsum += w - end - return rsum/wsum - end -end - -function mean_reward(m::POMDP, s, a) - if isterminal(m, s) - return 0.0 - else - td = transition(m, s, a) - rsum = 0.0 - wsum = 0.0 - for (sp, w) in weighted_iterator(td) - od = observation(m, s, a, sp) - for (o, ow) in weighted_iterator(od) - rsum += ow*w*reward(m, s, a, sp, o) - wsum += ow*w - end - end - return rsum/wsum - end -end diff --git a/src/terminal_state.jl b/src/terminal_state.jl deleted file mode 100644 index 49e044a..0000000 --- a/src/terminal_state.jl +++ /dev/null @@ -1,20 +0,0 @@ -""" - TerminalState - -A type with no fields whose singleton instance `terminalstate` is used to represent a terminal state with no additional information. - -This type has the appropriate promotion logic implemented to function like `Missing` when added to arrays, etc. - -Note that terminal states NEED NOT be of type `TerminalState`. You can define any state to be terminal by implementing the appropriate `isterminal` method. Solvers and simulators SHOULD NOT check for this type, but should instead check using `isterminal`. -""" -struct TerminalState end - -""" - terminalstate - -The singleton instance of type `TerminalState` representing a terminal state. -""" -const terminalstate = TerminalState() - -isterminal(m::Union{MDP,POMDP}, ts::TerminalState) = true -Base.promote_rule(::Type{TerminalState}, T::Type) = Union{TerminalState, T} diff --git a/src/underlying_mdp.jl b/src/underlying_mdp.jl deleted file mode 100644 index 75579b7..0000000 --- a/src/underlying_mdp.jl +++ /dev/null @@ -1,40 +0,0 @@ -""" - UnderlyingMDP(m::POMDP) - -Transform `POMDP` `m` into an `MDP` where the states are fully observed. - - UnderlyingMDP(m::MDP) - -Return `m` -""" -struct UnderlyingMDP{P <: POMDP, S, A} <: MDP{S, A} - pomdp::P -end - -function UnderlyingMDP(pomdp::POMDP{S, A, O}) where {S,A,O} - P = typeof(pomdp) - return UnderlyingMDP{P,S,A}(pomdp) -end - -UnderlyingMDP(m::MDP) = m - -POMDPs.transition(mdp::UnderlyingMDP{P, S, A}, s::S, a::A) where {P,S,A}= transition(mdp.pomdp, s, a) -POMDPs.initialstate(m::UnderlyingMDP) = initialstate(m.pomdp) -POMDPs.states(mdp::UnderlyingMDP) = states(mdp.pomdp) -POMDPs.actions(mdp::UnderlyingMDP) = actions(mdp.pomdp) -POMDPs.reward(mdp::UnderlyingMDP{P, S, A}, s::S, a::A) where {P,S,A} = reward(mdp.pomdp, s, a) -POMDPs.reward(mdp::UnderlyingMDP{P, S, A}, s::S, a::A, sp::S) where {P,S,A} = reward(mdp.pomdp, s, a, sp) -POMDPs.isterminal(mdp ::UnderlyingMDP{P, S, A}, s::S) where {P,S,A} = isterminal(mdp.pomdp, s) -POMDPs.discount(mdp::UnderlyingMDP) = discount(mdp.pomdp) -POMDPs.stateindex(mdp::UnderlyingMDP{P, S, A}, s::S) where {P,S,A} = stateindex(mdp.pomdp, s) -POMDPs.stateindex(mdp::UnderlyingMDP{P, Int, A}, s::Int) where {P,A} = stateindex(mdp.pomdp, s) # fix ambiguity with src/convenience -POMDPs.stateindex(mdp::UnderlyingMDP{P, Bool, A}, s::Bool) where {P,A} = stateindex(mdp.pomdp, s) -POMDPs.actionindex(mdp::UnderlyingMDP{P, S, A}, a::A) where {P,S,A} = actionindex(mdp.pomdp, a) -POMDPs.actionindex(mdp::UnderlyingMDP{P,S, Int}, a::Int) where {P,S} = actionindex(mdp.pomdp, a) -POMDPs.actionindex(mdp::UnderlyingMDP{P,S, Bool}, a::Bool) where {P,S} = actionindex(mdp.pomdp, a) - -POMDPs.gen(mdp::UnderlyingMDP, s, a, rng) = gen(mdp.pomdp, s, a, rng) - -# deprecated in POMDPs.jl v0.9 -POMDPs.initialstate_distribution(mdp::UnderlyingMDP) = initialstate_distribution(mdp.pomdp) -POMDPs.initialstate(mdp::UnderlyingMDP, rng::AbstractRNG) = initialstate(mdp.pomdp, rng) diff --git a/src/visualization.jl b/src/visualization.jl deleted file mode 100644 index 9321877..0000000 --- a/src/visualization.jl +++ /dev/null @@ -1,50 +0,0 @@ -""" - render(m::Union{MDP,POMDP}, step::NamedTuple) - -Return a renderable representation of the step in problem `m`. - -The renderable representation may be anything that has `show(io, mime, x)` -methods. It could be a plot, svg, Compose.jl context, Cairo context, or image. - -# Arguments -`step` is a `NamedTuple` that contains the states, action, etc. corresponding -to one transition in a simulation. It may have the following fields: -- `t`: the time step index -- `s`: the state at the beginning of the step -- `a`: the action -- `sp`: the state at the end of the step (s') -- `r`: the reward for the step -- `o`: the observation -- `b`: the belief at the -- `bp`: the belief at the end of the step -- `i`: info from the model when the state transition was calculated -- `ai`: info from the policy decision -- `ui`: info from the belief update - -Keyword arguments are reserved for the problem implementer and can be used to control appearance, etc. - -# Important Notes -- `step` may not contain all of the elements listed above, so `render` should check for them and render only what is available -- `o` typically corresponds to `sp`, so it is often clearer for POMDPs to render `sp` rather than `s`. -""" -function render(m::Union{MDP,POMDP}, step) - @warn("No implementation of POMDPModelTools.render(m::$m, step) found. Falling back to text default.", maxlog=1) - io = IOBuffer() - ioc = IOContext(io, :short=>true) - try - for (k, v) in pairs(step) - print(ioc, k) - print(ioc, ": ") - show(ioc, v) - println(ioc) - end - finally - println(ioc, """ - - Please implement POMDPModelTools.render(m::$(typeof(m)), step) to enable visualization. - """) - end - return String(take!(io)) -end - -render(m::Union{MDP, POMDP}) = render(m, NamedTuple())