smdp.md

Example of how one could abstract away high-level options to leverage MDP solvers. For context, options can be thought of as a tuple which include:

an initiation set, i.e., where can you start executing the option,
a policy, i.e., which primitive actions are chosen while an option is active,
and state (and maybe also action) dependent termination probabilties, i.e., when do you stop following the current active option.

The advantage of such an abstraction is that you can simply treat the options as regular actions and solve the high-level problem as you would any MDP provided you support variable discounts. The generative interface offers a simple way to combine an MDP with some an arbitrary set of options provided the solver doesn't assume constant discounts.

struct OptionSMDP{S, A, OPT} <: MDP{S, OPT}
    mdp::MDP{S, A}
end

abstract type Option <: Policy end

function action(option::Option, s) end
function actions(option::Option) end
function terminate(option::Option, s, rng) end

function POMDPs.gen(om::OptionSMDP, s, option, rng)
    cumr = 0.0
    disc = 1.0
    for (s, a, r, sp) in stepthrough(om.mdp, option, "s,a,r,sp", rng=rng)
        cumr += r * disc
        disc *= discount(om.mdp)
        if terminate(option, s, rng)
            break
        end
    end
    return (sp=sp, r=cumr, disc=disc)
end

Alternatively, you could augment the underlying MDP to keep track of options. This would allow you to simulate the MDP as you would normally but doesn't provide you much in the way of abstraction.

const StateOption = NamedTuple{(:state, :option), Tuple{S, O}} where {S, O}
const ActionOption = NamedTuple{(:action, :option), Tuple{A, O}} where {A, O}

struct OptionMDP{S, A, O} <: MDP{StateOption{S, O}, ActionOption{A, O}}
    mdp::MDP{S, A}
end

abstract type OptionPolicy <: Policy end

function action(policy::OptionPolicy, s::StateOption, rng::AbstractRNG)
    o = s.option
    if terminate(s.option, s.state, rng)
        o = option(policy, s.state)
    end
    return ActionOption(action(o, s.state), o)

function POMDPs.gen(::DDNNode{:sp}, m::OptionMDP{S, A, O}, s::StateOption{S, O}, a::ActionOption{A, O}) where {S, A, O}
    sp = POMDPs.gen(DDNNode(:sp), m.mdp, s.state, a.action)
    return StateOption(sp, a.option)
end

Ideally we would be able to support both of these under a single type but whether that is feasible without adding unwanted complexity is not clear. In any case, I think that all that is really needed to permit these kind of extensions elegantly would be to permit variable discounts.

gehring/smdp.md