Class sdm::SolvableByMDP

Class List > sdm > SolvableByMDP

The class for Markov Decision Processes.

#include <solvable_by_mdp.hpp>

Inherits the following classes: sdm::SolvableByHSVI

Inherited by the following classes: sdm::BaseBeliefMDP, sdm::BaseBeliefMDP

Public Functions

Type	Name
	SolvableByMDP () Default constructor.
	SolvableByMDP (const std::shared_ptr< MDPInterface > & mdp) Construct a problem solvable by HSVI .
virtual double	do_excess (double incumbent, double lb_value, double ub_value, double cost_so_far, double error, number horizon) Compute the excess of the HSVI paper. It refers to the termination condition.
virtual std::shared_ptr< Space >	getActionSpaceAt (const std::shared_ptr< State > & state, number t=0) Get the action space at a specific state and timestep. The state dependency is required when the game forbid the usage of a number of actions in this state. It is also used in some reformulated problems where actions are decision rules. The time dependency is required in extensive-form games in which some agents have a different action space.
virtual double	getDiscount (number t=0) const Get the specific discount factor for the problem at hand.
virtual double	getExpectedNextValue (const std::shared_ptr< ValueFunction > & value_function, const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t=0) Get the expected next value.
virtual std::shared_ptr< State >	getInitialState () Get the initial state.
virtual Pair< std::shared_ptr< State >, double >	getNextState (const std::shared_ptr< ValueFunction > & value_function, const std::shared_ptr< State > & belief, const std::shared_ptr< Action > & action, const std::shared_ptr< Observation > & observation, number t)
virtual std::shared_ptr< Space >	getObservationSpaceAt (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t)
virtual double	getReward (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t=0) Get the reward of executing a specific action in an specific state at timestep t. The time dependency can be required in non-stationnary problems.
virtual const std::shared_ptr< MDPInterface > &	getUnderlyingProblem () const Get the well defined underlying problem. Some problems are solvable by DP algorithms even if they are not well defined. Usually, they simply are reformulation of an underlying well defined problem. For instance, the underlying DecPOMDP of the OccupancyMDP or the underlying POMDP of the current BeliefMDP.
virtual double	getWeightedDiscount (number t) Get the specific weighted discount factor for the problem at hand.
virtual bool	isSerialized () const Check if the problem is serialized.
virtual std::shared_ptr< State >	nextState (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t=0, const std::shared_ptr< HSVI > & hsvi=nullptr) Select the next state.
virtual Pair< std::shared_ptr< Action >, double >	selectNextAction (const std::shared_ptr< ValueFunction > & lb, const std::shared_ptr< ValueFunction > & ub, const std::shared_ptr< State > & s, number h) Select the next action.
virtual void	setInitialState (const std::shared_ptr< State > & state)

Public Functions inherited from sdm::SolvableByHSVI

See sdm::SolvableByHSVI

Type	Name
virtual double	do_excess (double incumbent, double lb_value, double ub_value, double cost_so_far, double error, number t) = 0 Compute the excess of the HSVI paper. It refers to the termination condition.
virtual std::shared_ptr< Space >	getActionSpaceAt (const std::shared_ptr< State > & state, number t) = 0 Get the action space at a specific state and timestep. The state dependency is required when the game forbid the usage of a number of actions in this state. It is also used in some reformulated problems where actions are decision rules. The time dependency is required in extensive-form games in which some agents have a different action space.
virtual double	getDiscount (number t) const = 0 Get the specific discount factor for the problem at hand.
virtual double	getExpectedNextValue (const std::shared_ptr< ValueFunction > & value_function, const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) = 0 Get the expected next value.
virtual std::shared_ptr< State >	getInitialState () = 0 Get the initial state.
virtual Pair< std::shared_ptr< State >, double >	getNextState (const std::shared_ptr< ValueFunction > & value_function, const std::shared_ptr< State > & belief, const std::shared_ptr< Action > & action, const std::shared_ptr< Observation > & observation, number t) = 0
virtual std::shared_ptr< Space >	getObservationSpaceAt (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) = 0
virtual double	getReward (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) = 0 Get the reward of executing a specific action in an specific state at timestep t. The time dependency can be required in non-stationnary problems.
virtual const std::shared_ptr< MDPInterface > &	getUnderlyingProblem () const = 0 Get the well defined underlying problem. Some problems are solvable by DP algorithms even if they are not well defined. Usually, they simply are reformulation of an underlying well defined problem. For instance, the underlying DecPOMDP of the OccupancyMDP or the underlying POMDP of the current BeliefMDP.
virtual double	getWeightedDiscount (number t) = 0 Get the specific weighted discount factor for the problem at hand.
virtual bool	isSerialized () const = 0 Check if the problem is serialized.
virtual std::shared_ptr< State >	nextState (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t=0, const std::shared_ptr< HSVI > & hsvi=nullptr) = 0 Select the next state.
virtual Pair< std::shared_ptr< Action >, double >	selectNextAction (const std::shared_ptr< ValueFunction > & lb, const std::shared_ptr< ValueFunction > & ub, const std::shared_ptr< State > & state, number t) = 0 Select the next action.
virtual void	setInitialState (const std::shared_ptr< State > &) = 0
virtual	~SolvableByHSVI ()

Protected Attributes

Type	Name
std::shared_ptr< State >	initial_state_ The initial state.
std::shared_ptr< MDPInterface >	underlying_problem_ The underlying well defined problem.

Protected Functions

Type	Name
const std::shared_ptr< MDPInterface > &	getUnderlyingMDP () const Get the underlying mdp.

Public Functions Documentation

function SolvableByMDP [1/2]

sdm::SolvableByMDP::SolvableByMDP ()

function SolvableByMDP [2/2]

sdm::SolvableByMDP::SolvableByMDP (
    const std::shared_ptr< MDPInterface > & mdp
)

Parameters:

mdp the underlying MDP

function do_excess

virtual double sdm::SolvableByMDP::do_excess (
    double incumbent,
    double lb_value,
    double ub_value,
    double cost_so_far,
    double error,
    number horizon
)

Parameters:

double : incumbent
double : lb value
double : ub value
double : cost_so_far
double : error
number : horizon

Returns:

double

Implements sdm::SolvableByHSVI::do_excess

function getActionSpaceAt

virtual std::shared_ptr< Space > sdm::SolvableByMDP::getActionSpaceAt (
    const std::shared_ptr< State > & state,
    number t=0
)

Parameters:

state the state
t the timestep

Returns:

the action space

Implements sdm::SolvableByHSVI::getActionSpaceAt

function getDiscount

virtual double sdm::SolvableByMDP::getDiscount (
    number t=0
) const

Parameters:

number decision epoch or any other parameter

Returns:

double discount factor

Implements sdm::SolvableByHSVI::getDiscount

function getExpectedNextValue

virtual double sdm::SolvableByMDP::getExpectedNextValue (
    const std::shared_ptr< ValueFunction > & value_function,
    const std::shared_ptr< State > & state,
    const std::shared_ptr< Action > & action,
    number t=0
)

Parameters:

value_function a pointer on the value function to use to perform the calculus.
state the state on which to evaluate the next expected value *
action
t

Returns:

double

Implements sdm::SolvableByHSVI::getExpectedNextValue

function getInitialState

virtual std::shared_ptr< State > sdm::SolvableByMDP::getInitialState ()

Implements sdm::SolvableByHSVI::getInitialState

function getNextState

virtual Pair < std::shared_ptr< State >, double > sdm::SolvableByMDP::getNextState (
    const std::shared_ptr< ValueFunction > & value_function,
    const std::shared_ptr< State > & belief,
    const std::shared_ptr< Action > & action,
    const std::shared_ptr< Observation > & observation,
    number t
)

Implements sdm::SolvableByHSVI::getNextState

function getObservationSpaceAt

virtual std::shared_ptr< Space > sdm::SolvableByMDP::getObservationSpaceAt (
    const std::shared_ptr< State > & state,
    const std::shared_ptr< Action > & action,
    number t
)

Implements sdm::SolvableByHSVI::getObservationSpaceAt

function getReward

virtual double sdm::SolvableByMDP::getReward (
    const std::shared_ptr< State > & state,
    const std::shared_ptr< Action > & action,
    number t=0
)

Parameters:

state the state
action the action
t the timestep

Returns:

the reward

Implements sdm::SolvableByHSVI::getReward

function getUnderlyingProblem

virtual const std::shared_ptr< MDPInterface > & sdm::SolvableByMDP::getUnderlyingProblem () const

Returns:

the underlying problem

Implements sdm::SolvableByHSVI::getUnderlyingProblem

function getWeightedDiscount

virtual double sdm::SolvableByMDP::getWeightedDiscount (
    number t
)

Parameters:

number decision epoch or any other parameter

Returns:

double discount factor

Implements sdm::SolvableByHSVI::getWeightedDiscount

function isSerialized

virtual bool sdm::SolvableByMDP::isSerialized () const

Returns:

true if the problem is serialized.

Returns:

false if the problem is not serialized.

Implements sdm::SolvableByHSVI::isSerialized

function nextState

virtual std::shared_ptr< State > sdm::SolvableByMDP::nextState (
    const std::shared_ptr< State > & state,
    const std::shared_ptr< Action > & action,
    number t=0,
    const std::shared_ptr< HSVI > & hsvi=nullptr
)

Parameters:

state the state
action the action
t the timestep
hsvi a pointer on the algorithm that makes the call

Returns:

the next state

Implements sdm::SolvableByHSVI::nextState

function selectNextAction

virtual Pair < std::shared_ptr< Action >, double > sdm::SolvableByMDP::selectNextAction (
    const std::shared_ptr< ValueFunction > & lb,
    const std::shared_ptr< ValueFunction > & ub,
    const std::shared_ptr< State > & s,
    number h
)

Parameters:

const std::shared_ptr<ValueFunction<TState, TAction>>& : the lower bound
const std::shared_ptr<ValueFunction<TState, TAction>>& : the upper bound
const TState & s : current state
number h : horizon

Returns:

TAction

Implements sdm::SolvableByHSVI::selectNextAction

function setInitialState

virtual void sdm::SolvableByMDP::setInitialState (
    const std::shared_ptr< State > & state
)

Implements sdm::SolvableByHSVI::setInitialState

Protected Attributes Documentation

variable initial_state_

std::shared_ptr<State> sdm::SolvableByMDP::initial_state_;

variable underlying_problem_

std::shared_ptr<MDPInterface> sdm::SolvableByMDP::underlying_problem_;

Protected Functions Documentation

function getUnderlyingMDP

const std::shared_ptr< MDPInterface > & sdm::SolvableByMDP::getUnderlyingMDP () const

The documentation for this class was generated from the following file src/sdm/world/solvable_by_mdp.hpp