Class sdm::MDP

Class List > sdm > MDP

The class for Discrete Markov Decision Processes.

  • #include <mdp.hpp>

Inherits the following classes: sdm::MDPInterface, sdm::GymInterface

Inherited by the following classes: sdm::MMDP, sdm::POMDP

Public Functions

Type Name
MDP ()
MDP (const std::shared_ptr< Space > & state_space, const std::shared_ptr< Space > & action_space, const std::shared_ptr< RewardInterface > & reward_space, const std::shared_ptr< StateDynamicsInterface > & state_dynamics, const std::shared_ptr< Distribution< std::shared_ptr< State >>> & start_distribution, number horizon=0, double discount=0.99, Criterion criterion=Criterion::REW_MAX)
void generateFile (std::string)
Save problem in file with given format (.xml, .json or .{dpomdp, posg, zsposg}).
virtual std::shared_ptr< Space > getActionSpace (number t=0) const
Get ths action space at timestep t.
virtual std::shared_ptr< Space > getActionSpaceAt (const std::shared_ptr< Observation > & observation, number t)
Get the action space.
virtual double getDiscount (number t=0) const
Get the discount factor at timestep t.
virtual number getHorizon () const
Get the planning horizon.
virtual std::shared_ptr< State > getInternalState () const
virtual double getMaxReward (number t=0) const
virtual double getMinReward (number t=0) const
virtual number getNumAgents () const
Get the number of agents.
virtual std::shared_ptr< Action > getRandomAction (const std::shared_ptr< Observation > & observation, number t)
Get random action.
virtual std::set< std::shared_ptr< State > > getReachableStates (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t=0) const
Get the reachable next states.
virtual double getReward (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t=0) const
Get the reward of executing action a in state s at timestep t.
virtual std::shared_ptr< RewardInterface > getRewardSpace () const
Get the reward function.
virtual std::shared_ptr< Distribution< std::shared_ptr< State > > > getStartDistribution () const
Get the initial distribution over states.
virtual std::shared_ptr< StateDynamicsInterface > getStateDynamics () const
Get the state dynamics.
virtual std::shared_ptr< Space > getStateSpace (number t=0) const
Get ths state space at timestep t.
virtual double getTransitionProbability (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, number t=0) const
Get the Transition Probability object.
virtual std::shared_ptr< Observation > reset ()
Reset the environment and return initial observation.
virtual std::shared_ptr< Observation > sampleNextObservation (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t)
void setDiscount (double discount)
Set the discount factor.
void setHorizon (number horizon)
Set the planning horizon.
virtual void setInternalState (std::shared_ptr< State >)
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > step (std::shared_ptr< Action > action)
Do a step on the environment.
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > step (std::shared_ptr< Action > action, bool increment_timestep)
virtual std::string toJSON ()
Encodes MDP class into a string (JSON format).
virtual std::string toStdFormat ()
Encodes MDP class into a string (standard .posg or .dpomdp or .zsposg format).
virtual std::string toXML ()
Encodes MDP class into a string (XML format).
virtual ~MDP ()

Public Functions inherited from sdm::MDPInterface

See sdm::MDPInterface

Type Name
virtual std::shared_ptr< Space > getActionSpace (number t) const = 0
Get ths action space at timestep t.
virtual double getDiscount (number t) const = 0
Get the discount factor at timestep t.
virtual number getHorizon () const = 0
Get the number of agents.
virtual std::shared_ptr< State > getInternalState () const = 0
virtual double getMaxReward (number t) const = 0
virtual double getMinReward (number t) const = 0
virtual number getNumAgents () const = 0
Get the number of agents.
virtual std::set< std::shared_ptr< State > > getReachableStates (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) const = 0
Get reachable states.
virtual double getReward (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) const = 0
Get the reward at timestep t when executing an action in a specific state.
virtual std::shared_ptr< Distribution< std::shared_ptr< State > > > getStartDistribution () const = 0
Get the initial distribution over states.
virtual std::shared_ptr< Space > getStateSpace (number t) const = 0
Get ths state space at timestep t.
virtual double getTransitionProbability (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, number t) const = 0
_Get the transition probability, i.e. p(s'
virtual void setInternalState (std::shared_ptr< State > state) = 0
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > step (std::shared_ptr< Action > action) = 0
Do a step on the environment.
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > step (std::shared_ptr< Action > action, bool increment_timestep) = 0

Public Functions inherited from sdm::GymInterface

See sdm::GymInterface

Type Name
virtual std::shared_ptr< Space > getActionSpaceAt (const std::shared_ptr< Observation > & observation, number t) = 0
Get the action space.
virtual std::shared_ptr< Action > getRandomAction (const std::shared_ptr< Observation > & observation, number t) = 0
Get random action.
virtual std::shared_ptr< Observation > reset () = 0
Reset the environment and return initial observation.
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > step (std::shared_ptr< Action > action) = 0
Do a step on the environment.

Public Functions inherited from sdm::GymInterface

See sdm::GymInterface

Type Name
virtual std::shared_ptr< Space > getActionSpaceAt (const std::shared_ptr< Observation > & observation, number t) = 0
Get the action space.
virtual std::shared_ptr< Action > getRandomAction (const std::shared_ptr< Observation > & observation, number t) = 0
Get random action.
virtual std::shared_ptr< Observation > reset () = 0
Reset the environment and return initial observation.
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > step (std::shared_ptr< Action > action) = 0
Do a step on the environment.

Protected Attributes

Type Name
std::shared_ptr< Space > action_space_
Criterion criterion_
int current_timestep_
double discount_
number horizon_
std::shared_ptr< State > internal_state_
number num_agents_
std::shared_ptr< RewardInterface > reward_space_
std::shared_ptr< Distribution< std::shared_ptr< State > > > start_distribution_
std::shared_ptr< StateDynamicsInterface > state_dynamics_
std::shared_ptr< Space > state_space_

Public Functions Documentation

function MDP [1/2]

sdm::MDP::MDP () 

function MDP [2/2]

sdm::MDP::MDP (
    const std::shared_ptr< Space > & state_space,
    const std::shared_ptr< Space > & action_space,
    const std::shared_ptr< RewardInterface > & reward_space,
    const std::shared_ptr< StateDynamicsInterface > & state_dynamics,
    const std::shared_ptr< Distribution < std::shared_ptr< State >>> & start_distribution,
    number horizon=0,
    double discount=0.99,
    Criterion criterion=Criterion::REW_MAX
) 

function generateFile

void sdm::MDP::generateFile (
    std::string
) 

Parameters:

  • filename the file name

function getActionSpace

virtual std::shared_ptr< Space > sdm::MDP::getActionSpace (
    number t=0
) const

Parameters:

  • t the timestep

Returns:

the action space

Implements sdm::MDPInterface::getActionSpace

function getActionSpaceAt

virtual std::shared_ptr< Space > sdm::MDP::getActionSpaceAt (
    const std::shared_ptr< Observation > & observation,
    number t
) 

Parameters:

  • observation the observation in consideration
  • t time step

Returns:

the action space.

Implements sdm::GymInterface::getActionSpaceAt

function getDiscount

virtual double sdm::MDP::getDiscount (
    number t=0
) const

Parameters:

  • t the timestep

Returns:

the discount factor

Implements sdm::MDPInterface::getDiscount

function getHorizon

virtual number sdm::MDP::getHorizon () const

Returns:

the planning horizon

Implements sdm::MDPInterface::getHorizon

function getInternalState

virtual std::shared_ptr< State > sdm::MDP::getInternalState () const

Implements sdm::MDPInterface::getInternalState

function getMaxReward

virtual double sdm::MDP::getMaxReward (
    number t=0
) const

Implements sdm::MDPInterface::getMaxReward

function getMinReward

virtual double sdm::MDP::getMinReward (
    number t=0
) const

Implements sdm::MDPInterface::getMinReward

function getNumAgents

virtual number sdm::MDP::getNumAgents () const

Returns:

the number of agents

Implements sdm::MDPInterface::getNumAgents

function getRandomAction

virtual std::shared_ptr< Action > sdm::MDP::getRandomAction (
    const std::shared_ptr< Observation > & observation,
    number t
) 

Parameters:

  • observation the observation in consideration.
  • t time step.

Returns:

the random action.

Implements sdm::GymInterface::getRandomAction

function getReachableStates

virtual std::set< std::shared_ptr< State > > sdm::MDP::getReachableStates (
    const std::shared_ptr< State > & state,
    const std::shared_ptr< Action > & action,
    number t=0
) const

Parameters:

  • state the state
  • action the action

Returns:

the set of reachable states

Implements sdm::MDPInterface::getReachableStates

function getReward

virtual double sdm::MDP::getReward (
    const std::shared_ptr< State > & state,
    const std::shared_ptr< Action > & action,
    number t=0
) const

Parameters:

  • state the state
  • action the action
  • t the timestep

Returns:

the value of the reward

Implements sdm::MDPInterface::getReward

function getRewardSpace

virtual std::shared_ptr< RewardInterface > sdm::MDP::getRewardSpace () const

Returns:

the reward function

function getStartDistribution

virtual std::shared_ptr< Distribution < std::shared_ptr< State > > > sdm::MDP::getStartDistribution () const

Returns:

the initial distribution over states

Implements sdm::MDPInterface::getStartDistribution

function getStateDynamics

virtual std::shared_ptr< StateDynamicsInterface > sdm::MDP::getStateDynamics () const

Returns:

the state dynamics interface

function getStateSpace

virtual std::shared_ptr< Space > sdm::MDP::getStateSpace (
    number t=0
) const

Parameters:

  • t the timestep

Returns:

the state space

Implements sdm::MDPInterface::getStateSpace

function getTransitionProbability

virtual double sdm::MDP::getTransitionProbability (
    const std::shared_ptr< State > & state,
    const std::shared_ptr< Action > & action,
    const std::shared_ptr< State > & next_state,
    number t=0
) const

Parameters:

  • state
  • action
  • next_state
  • t

Returns:

double

Implements sdm::MDPInterface::getTransitionProbability

function reset

virtual std::shared_ptr< Observation > sdm::MDP::reset () 

Returns:

the initial observation

Implements sdm::GymInterface::reset

function sampleNextObservation

virtual std::shared_ptr< Observation > sdm::MDP::sampleNextObservation (
    const std::shared_ptr< State > & state,
    const std::shared_ptr< Action > & action,
    number t
) 

function setDiscount

void sdm::MDP::setDiscount (
    double discount
) 

Parameters:

  • discount the discount factor

function setHorizon

void sdm::MDP::setHorizon (
    number horizon
) 

Parameters:

  • horizon the planning horizon

function setInternalState

virtual void sdm::MDP::setInternalState (
    std::shared_ptr< State >
) 

Implements sdm::MDPInterface::setInternalState

function step [1/2]

virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > sdm::MDP::step (
    std::shared_ptr< Action > action
) 

Parameters:

  • action the action to execute

Returns:

the information produced. Include : next observation, rewards, episode done

Implements sdm::MDPInterface::step

function step [2/2]

virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > sdm::MDP::step (
    std::shared_ptr< Action > action,
    bool increment_timestep
) 

Implements sdm::MDPInterface::step

function toJSON

virtual std::string sdm::MDP::toJSON () 

function toStdFormat

virtual std::string sdm::MDP::toStdFormat () 

Returns:

the process as XML

function toXML

virtual std::string sdm::MDP::toXML () 

Returns:

the process as XML

function ~MDP

virtual sdm::MDP::~MDP () 

Protected Attributes Documentation

variable action_space_

std::shared_ptr<Space> sdm::MDP::action_space_;

variable criterion_

Criterion sdm::MDP::criterion_;

variable current_timestep_

int sdm::MDP::current_timestep_;

variable discount_

double sdm::MDP::discount_;

variable horizon_

number sdm::MDP::horizon_;

variable internal_state_

std::shared_ptr<State> sdm::MDP::internal_state_;

variable num_agents_

number sdm::MDP::num_agents_;

variable reward_space_

std::shared_ptr<RewardInterface> sdm::MDP::reward_space_;

variable start_distribution_

std::shared_ptr<Distribution<std::shared_ptr<State> > > sdm::MDP::start_distribution_;

variable state_dynamics_

std::shared_ptr<StateDynamicsInterface> sdm::MDP::state_dynamics_;

variable state_space_

std::shared_ptr<Space> sdm::MDP::state_space_;

Friends Documentation

friend operator<<

inline friend std::ostream & sdm::MDP::operator<< (
    std::ostream & os,
    MDP & model
) 

The documentation for this class was generated from the following file src/sdm/world/mdp.hpp