Class sdm::MDPInterface
Class List > sdm > MDPInterface
The class for Discrete Markov Decision Processes.
#include <mdp_interface.hpp>
Inherits the following classes: sdm::GymInterface
Inherited by the following classes: sdm::MDP, sdm::MMDPInterface, sdm::POMDPInterface
Public Functions
Type | Name |
---|---|
virtual std::shared_ptr< Space > | getActionSpace (number t) const = 0 Get ths action space at timestep t. |
virtual double | getDiscount (number t) const = 0 Get the discount factor at timestep t. |
virtual number | getHorizon () const = 0 Get the number of agents. |
virtual std::shared_ptr< State > | getInternalState () const = 0 |
virtual double | getMaxReward (number t) const = 0 |
virtual double | getMinReward (number t) const = 0 |
virtual number | getNumAgents () const = 0 Get the number of agents. |
virtual std::set< std::shared_ptr< State > > | getReachableStates (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) const = 0 Get reachable states. |
virtual double | getReward (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) const = 0 Get the reward at timestep t when executing an action in a specific state. |
virtual std::shared_ptr< Distribution< std::shared_ptr< State > > > | getStartDistribution () const = 0 Get the initial distribution over states. |
virtual std::shared_ptr< Space > | getStateSpace (number t) const = 0 Get ths state space at timestep t. |
virtual double | getTransitionProbability (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, number t) const = 0 _Get the transition probability, i.e. p(s' |
virtual void | setInternalState (std::shared_ptr< State > state) = 0 |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action) = 0 Do a step on the environment. |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action, bool increment_timestep) = 0 |
Public Functions inherited from sdm::GymInterface
Type | Name |
---|---|
virtual std::shared_ptr< Space > | getActionSpaceAt (const std::shared_ptr< Observation > & observation, number t) = 0 Get the action space. |
virtual std::shared_ptr< Action > | getRandomAction (const std::shared_ptr< Observation > & observation, number t) = 0 Get random action. |
virtual std::shared_ptr< Observation > | reset () = 0 Reset the environment and return initial observation. |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action) = 0 Do a step on the environment. |
Public Functions Documentation
function getActionSpace
virtual std::shared_ptr< Space > sdm::MDPInterface::getActionSpace (
number t
) const = 0
Parameters:
t
the timestep
Returns:
the action space
function getDiscount
virtual double sdm::MDPInterface::getDiscount (
number t
) const = 0
Parameters:
t
the timestep
Returns:
the discount factor
function getHorizon
virtual number sdm::MDPInterface::getHorizon () const = 0
Returns:
the number of agents
function getInternalState
virtual std::shared_ptr< State > sdm::MDPInterface::getInternalState () const = 0
function getMaxReward
virtual double sdm::MDPInterface::getMaxReward (
number t
) const = 0
function getMinReward
virtual double sdm::MDPInterface::getMinReward (
number t
) const = 0
function getNumAgents
virtual number sdm::MDPInterface::getNumAgents () const = 0
Returns:
the number of agents
function getReachableStates
virtual std::set< std::shared_ptr< State > > sdm::MDPInterface::getReachableStates (
const std::shared_ptr< State > & state,
const std::shared_ptr< Action > & action,
number t
) const = 0
Parameters:
state
the current stateaction
the current action
Returns:
the set of reachable states
function getReward
virtual double sdm::MDPInterface::getReward (
const std::shared_ptr< State > & state,
const std::shared_ptr< Action > & action,
number t
) const = 0
Parameters:
state
the current stateaction
the actiont
the timestep
Returns:
double the reward for each agent
function getStartDistribution
virtual std::shared_ptr< Distribution < std::shared_ptr< State > > > sdm::MDPInterface::getStartDistribution () const = 0
Returns:
the initial distribution over states
function getStateSpace
virtual std::shared_ptr< Space > sdm::MDPInterface::getStateSpace (
number t
) const = 0
Parameters:
t
the timestep
Returns:
the state space
function getTransitionProbability
virtual double sdm::MDPInterface::getTransitionProbability (
const std::shared_ptr< State > & state,
const std::shared_ptr< Action > & action,
const std::shared_ptr< State > & next_state,
number t
) const = 0
Parameters:
state
the current stateaction
the actionnext_state
the next statet
the timestep
Returns:
the probability
function setInternalState
virtual void sdm::MDPInterface::setInternalState (
std::shared_ptr< State > state
) = 0
function step [1/2]
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > sdm::MDPInterface::step (
std::shared_ptr< Action > action
) = 0
Parameters:
action
the action to execute
Returns:
the information produced. Include : next observation, rewards, episode done
Implements sdm::GymInterface::step
function step [2/2]
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > sdm::MDPInterface::step (
std::shared_ptr< Action > action,
bool increment_timestep
) = 0
The documentation for this class was generated from the following file src/sdm/world/base/mdp_interface.hpp