Class sdm::MPOMDP
Class List > sdm > MPOMDP
The class for Discrete Partially Observable Markov Decision Processes.
#include <mpomdp.hpp>
Inherits the following classes: sdm::MPOMDPInterface, sdm::POMDP, sdm::MMDP
Inherited by the following classes: sdm::NetworkedDistributedPOMDP
Public Functions
Type | Name |
---|---|
MPOMDP () | |
MPOMDP (const std::shared_ptr< Space > & state_space, const std::shared_ptr< Space > & action_space, const std::shared_ptr< Space > & obs_space, const std::shared_ptr< RewardInterface > & reward, const std::shared_ptr< StateDynamicsInterface > & state_dynamics, const std::shared_ptr< ObservationDynamicsInterface > & obs_dynamics, const std::shared_ptr< Distribution< std::shared_ptr< State >>> & start_distrib, number horizon=0, double discount=0.99, Criterion criterion=Criterion::REW_MAX) | |
virtual std::shared_ptr< Space > | getObservationSpace (number agent_id, number t) const Get ths observation space of agent i at timestep t. |
virtual std::shared_ptr< Space > | getObservationSpace (number t=0) const Get ths observation space at timestep t. |
virtual std::string | toStdFormat () Encodes MDP class into a string (standard .posg or .dpomdp or .zsposg format). |
virtual | ~MPOMDP () |
Public Functions inherited from sdm::MPOMDPInterface
Type | Name |
---|---|
virtual std::shared_ptr< Space > | getObservationSpace (number agent_id, number t) const = 0 Get ths observation space of agent i at timestep t. |
virtual std::shared_ptr< Space > | getObservationSpace (number t) const = 0 Get ths observation space at timestep t. |
Public Functions inherited from sdm::MMDPInterface
Type | Name |
---|---|
virtual std::shared_ptr< Space > | getActionSpace (number agent_id, number t) const = 0 Get ths action space of agent i at timestep t. |
virtual std::shared_ptr< Space > | getActionSpace (number t) const = 0 Get ths action space at timestep t. |
Public Functions inherited from sdm::MDPInterface
Type | Name |
---|---|
virtual std::shared_ptr< Space > | getActionSpace (number t) const = 0 Get ths action space at timestep t. |
virtual double | getDiscount (number t) const = 0 Get the discount factor at timestep t. |
virtual number | getHorizon () const = 0 Get the number of agents. |
virtual std::shared_ptr< State > | getInternalState () const = 0 |
virtual double | getMaxReward (number t) const = 0 |
virtual double | getMinReward (number t) const = 0 |
virtual number | getNumAgents () const = 0 Get the number of agents. |
virtual std::set< std::shared_ptr< State > > | getReachableStates (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) const = 0 Get reachable states. |
virtual double | getReward (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) const = 0 Get the reward at timestep t when executing an action in a specific state. |
virtual std::shared_ptr< Distribution< std::shared_ptr< State > > > | getStartDistribution () const = 0 Get the initial distribution over states. |
virtual std::shared_ptr< Space > | getStateSpace (number t) const = 0 Get ths state space at timestep t. |
virtual double | getTransitionProbability (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, number t) const = 0 _Get the transition probability, i.e. p(s' |
virtual void | setInternalState (std::shared_ptr< State > state) = 0 |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action) = 0 Do a step on the environment. |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action, bool increment_timestep) = 0 |
Public Functions inherited from sdm::GymInterface
Type | Name |
---|---|
virtual std::shared_ptr< Space > | getActionSpaceAt (const std::shared_ptr< Observation > & observation, number t) = 0 Get the action space. |
virtual std::shared_ptr< Action > | getRandomAction (const std::shared_ptr< Observation > & observation, number t) = 0 Get random action. |
virtual std::shared_ptr< Observation > | reset () = 0 Reset the environment and return initial observation. |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action) = 0 Do a step on the environment. |
Public Functions inherited from sdm::POMDPInterface
Type | Name |
---|---|
virtual double | getDynamics (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, const std::shared_ptr< Observation > & observation, number t) const = 0 _Get the dynamics, i.e. p(s', o |
virtual double | getObservationProbability (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, const std::shared_ptr< Observation > & observation, number t) const = 0 _Get the observation probability, i.e. p(o |
virtual std::shared_ptr< Space > | getObservationSpace (number t) const = 0 Get ths observation space at timestep t. |
virtual std::set< std::shared_ptr< Observation > > | getReachableObservations (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, number t) const = 0 Get reachable observations. |
Public Functions inherited from sdm::MDPInterface
Type | Name |
---|---|
virtual std::shared_ptr< Space > | getActionSpace (number t) const = 0 Get ths action space at timestep t. |
virtual double | getDiscount (number t) const = 0 Get the discount factor at timestep t. |
virtual number | getHorizon () const = 0 Get the number of agents. |
virtual std::shared_ptr< State > | getInternalState () const = 0 |
virtual double | getMaxReward (number t) const = 0 |
virtual double | getMinReward (number t) const = 0 |
virtual number | getNumAgents () const = 0 Get the number of agents. |
virtual std::set< std::shared_ptr< State > > | getReachableStates (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) const = 0 Get reachable states. |
virtual double | getReward (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) const = 0 Get the reward at timestep t when executing an action in a specific state. |
virtual std::shared_ptr< Distribution< std::shared_ptr< State > > > | getStartDistribution () const = 0 Get the initial distribution over states. |
virtual std::shared_ptr< Space > | getStateSpace (number t) const = 0 Get ths state space at timestep t. |
virtual double | getTransitionProbability (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, number t) const = 0 _Get the transition probability, i.e. p(s' |
virtual void | setInternalState (std::shared_ptr< State > state) = 0 |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action) = 0 Do a step on the environment. |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action, bool increment_timestep) = 0 |
Public Functions inherited from sdm::GymInterface
Type | Name |
---|---|
virtual std::shared_ptr< Space > | getActionSpaceAt (const std::shared_ptr< Observation > & observation, number t) = 0 Get the action space. |
virtual std::shared_ptr< Action > | getRandomAction (const std::shared_ptr< Observation > & observation, number t) = 0 Get random action. |
virtual std::shared_ptr< Observation > | reset () = 0 Reset the environment and return initial observation. |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action) = 0 Do a step on the environment. |
Public Functions inherited from sdm::POMDP
See sdm::POMDP
Type | Name |
---|---|
POMDP () | |
POMDP (const std::shared_ptr< Space > & state_space, const std::shared_ptr< Space > & action_space, const std::shared_ptr< Space > & obs_space, const std::shared_ptr< RewardInterface > & reward, const std::shared_ptr< StateDynamicsInterface > & state_dynamics, const std::shared_ptr< ObservationDynamicsInterface > & obs_dynamics, const std::shared_ptr< Distribution< std::shared_ptr< State >>> & start_distrib, number horizon=0, double discount=0.99, Criterion criterion=Criterion::REW_MAX) | |
virtual double | getDynamics (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, const std::shared_ptr< Observation > & observation, number t=0) const _Get the dynamics, i.e. p(s', o |
std::shared_ptr< ObservationDynamicsInterface > | getObservationDynamics () const |
virtual double | getObservationProbability (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, const std::shared_ptr< Observation > & observation, number t=0) const _Get the observation probability, i.e. p(o |
virtual std::shared_ptr< Space > | getObservationSpace (number t=0) const Get ths observation space at timestep t. |
virtual std::set< std::shared_ptr< Observation > > | getReachableObservations (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, number t) const Get reachable observations. |
virtual std::shared_ptr< Observation > | sampleNextObservation (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) |
Public Functions inherited from sdm::MDP
See sdm::MDP
Type | Name |
---|---|
MDP () | |
MDP (const std::shared_ptr< Space > & state_space, const std::shared_ptr< Space > & action_space, const std::shared_ptr< RewardInterface > & reward_space, const std::shared_ptr< StateDynamicsInterface > & state_dynamics, const std::shared_ptr< Distribution< std::shared_ptr< State >>> & start_distribution, number horizon=0, double discount=0.99, Criterion criterion=Criterion::REW_MAX) | |
void | generateFile (std::string) Save problem in file with given format (.xml, .json or .{dpomdp, posg, zsposg}). |
virtual std::shared_ptr< Space > | getActionSpace (number t=0) const Get ths action space at timestep t. |
virtual std::shared_ptr< Space > | getActionSpaceAt (const std::shared_ptr< Observation > & observation, number t) Get the action space. |
virtual double | getDiscount (number t=0) const Get the discount factor at timestep t. |
virtual number | getHorizon () const Get the planning horizon. |
virtual std::shared_ptr< State > | getInternalState () const |
virtual double | getMaxReward (number t=0) const |
virtual double | getMinReward (number t=0) const |
virtual number | getNumAgents () const Get the number of agents. |
virtual std::shared_ptr< Action > | getRandomAction (const std::shared_ptr< Observation > & observation, number t) Get random action. |
virtual std::set< std::shared_ptr< State > > | getReachableStates (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t=0) const Get the reachable next states. |
virtual double | getReward (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t=0) const Get the reward of executing action a in state s at timestep t. |
virtual std::shared_ptr< RewardInterface > | getRewardSpace () const Get the reward function. |
virtual std::shared_ptr< Distribution< std::shared_ptr< State > > > | getStartDistribution () const Get the initial distribution over states. |
virtual std::shared_ptr< StateDynamicsInterface > | getStateDynamics () const Get the state dynamics. |
virtual std::shared_ptr< Space > | getStateSpace (number t=0) const Get ths state space at timestep t. |
virtual double | getTransitionProbability (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, number t=0) const Get the Transition Probability object. |
virtual std::shared_ptr< Observation > | reset () Reset the environment and return initial observation. |
virtual std::shared_ptr< Observation > | sampleNextObservation (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) |
void | setDiscount (double discount) Set the discount factor. |
void | setHorizon (number horizon) Set the planning horizon. |
virtual void | setInternalState (std::shared_ptr< State >) |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action) Do a step on the environment. |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action, bool increment_timestep) |
virtual std::string | toJSON () Encodes MDP class into a string (JSON format). |
virtual std::string | toStdFormat () Encodes MDP class into a string (standard .posg or .dpomdp or .zsposg format). |
virtual std::string | toXML () Encodes MDP class into a string (XML format). |
virtual | ~MDP () |
Public Functions inherited from sdm::MDPInterface
Type | Name |
---|---|
virtual std::shared_ptr< Space > | getActionSpace (number t) const = 0 Get ths action space at timestep t. |
virtual double | getDiscount (number t) const = 0 Get the discount factor at timestep t. |
virtual number | getHorizon () const = 0 Get the number of agents. |
virtual std::shared_ptr< State > | getInternalState () const = 0 |
virtual double | getMaxReward (number t) const = 0 |
virtual double | getMinReward (number t) const = 0 |
virtual number | getNumAgents () const = 0 Get the number of agents. |
virtual std::set< std::shared_ptr< State > > | getReachableStates (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) const = 0 Get reachable states. |
virtual double | getReward (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) const = 0 Get the reward at timestep t when executing an action in a specific state. |
virtual std::shared_ptr< Distribution< std::shared_ptr< State > > > | getStartDistribution () const = 0 Get the initial distribution over states. |
virtual std::shared_ptr< Space > | getStateSpace (number t) const = 0 Get ths state space at timestep t. |
virtual double | getTransitionProbability (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, number t) const = 0 _Get the transition probability, i.e. p(s' |
virtual void | setInternalState (std::shared_ptr< State > state) = 0 |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action) = 0 Do a step on the environment. |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action, bool increment_timestep) = 0 |
Public Functions inherited from sdm::GymInterface
Type | Name |
---|---|
virtual std::shared_ptr< Space > | getActionSpaceAt (const std::shared_ptr< Observation > & observation, number t) = 0 Get the action space. |
virtual std::shared_ptr< Action > | getRandomAction (const std::shared_ptr< Observation > & observation, number t) = 0 Get random action. |
virtual std::shared_ptr< Observation > | reset () = 0 Reset the environment and return initial observation. |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action) = 0 Do a step on the environment. |
Public Functions inherited from sdm::GymInterface
Type | Name |
---|---|
virtual std::shared_ptr< Space > | getActionSpaceAt (const std::shared_ptr< Observation > & observation, number t) = 0 Get the action space. |
virtual std::shared_ptr< Action > | getRandomAction (const std::shared_ptr< Observation > & observation, number t) = 0 Get random action. |
virtual std::shared_ptr< Observation > | reset () = 0 Reset the environment and return initial observation. |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action) = 0 Do a step on the environment. |
Public Functions inherited from sdm::POMDPInterface
Type | Name |
---|---|
virtual double | getDynamics (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, const std::shared_ptr< Observation > & observation, number t) const = 0 _Get the dynamics, i.e. p(s', o |
virtual double | getObservationProbability (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, const std::shared_ptr< Observation > & observation, number t) const = 0 _Get the observation probability, i.e. p(o |
virtual std::shared_ptr< Space > | getObservationSpace (number t) const = 0 Get ths observation space at timestep t. |
virtual std::set< std::shared_ptr< Observation > > | getReachableObservations (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, number t) const = 0 Get reachable observations. |
Public Functions inherited from sdm::MDPInterface
Type | Name |
---|---|
virtual std::shared_ptr< Space > | getActionSpace (number t) const = 0 Get ths action space at timestep t. |
virtual double | getDiscount (number t) const = 0 Get the discount factor at timestep t. |
virtual number | getHorizon () const = 0 Get the number of agents. |
virtual std::shared_ptr< State > | getInternalState () const = 0 |
virtual double | getMaxReward (number t) const = 0 |
virtual double | getMinReward (number t) const = 0 |
virtual number | getNumAgents () const = 0 Get the number of agents. |
virtual std::set< std::shared_ptr< State > > | getReachableStates (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) const = 0 Get reachable states. |
virtual double | getReward (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) const = 0 Get the reward at timestep t when executing an action in a specific state. |
virtual std::shared_ptr< Distribution< std::shared_ptr< State > > > | getStartDistribution () const = 0 Get the initial distribution over states. |
virtual std::shared_ptr< Space > | getStateSpace (number t) const = 0 Get ths state space at timestep t. |
virtual double | getTransitionProbability (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, number t) const = 0 _Get the transition probability, i.e. p(s' |
virtual void | setInternalState (std::shared_ptr< State > state) = 0 |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action) = 0 Do a step on the environment. |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action, bool increment_timestep) = 0 |
Public Functions inherited from sdm::GymInterface
Type | Name |
---|---|
virtual std::shared_ptr< Space > | getActionSpaceAt (const std::shared_ptr< Observation > & observation, number t) = 0 Get the action space. |
virtual std::shared_ptr< Action > | getRandomAction (const std::shared_ptr< Observation > & observation, number t) = 0 Get random action. |
virtual std::shared_ptr< Observation > | reset () = 0 Reset the environment and return initial observation. |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action) = 0 Do a step on the environment. |
Public Functions inherited from sdm::MMDP
See sdm::MMDP
Type | Name |
---|---|
MMDP () | |
MMDP (const std::shared_ptr< Space > & state_space, const std::shared_ptr< Space > & action_space, const std::shared_ptr< RewardInterface > & reward, const std::shared_ptr< StateDynamicsInterface > & state_dynamics, const std::shared_ptr< Distribution< std::shared_ptr< State >>> & start_distrib, number horizon=0, double discount=0.99, Criterion criterion=Criterion::REW_MAX) | |
virtual std::shared_ptr< Space > | getActionSpace (number t=0) const Get ths action space at timestep t. |
virtual std::shared_ptr< Space > | getActionSpace (number agent_id, number t) const Get ths action space of agent i at timestep t. |
virtual std::string | toStdFormat () Encodes MDP class into a string (standard .posg or .dpomdp or .zsposg format). |
Public Functions inherited from sdm::MDP
See sdm::MDP
Type | Name |
---|---|
MDP () | |
MDP (const std::shared_ptr< Space > & state_space, const std::shared_ptr< Space > & action_space, const std::shared_ptr< RewardInterface > & reward_space, const std::shared_ptr< StateDynamicsInterface > & state_dynamics, const std::shared_ptr< Distribution< std::shared_ptr< State >>> & start_distribution, number horizon=0, double discount=0.99, Criterion criterion=Criterion::REW_MAX) | |
void | generateFile (std::string) Save problem in file with given format (.xml, .json or .{dpomdp, posg, zsposg}). |
virtual std::shared_ptr< Space > | getActionSpace (number t=0) const Get ths action space at timestep t. |
virtual std::shared_ptr< Space > | getActionSpaceAt (const std::shared_ptr< Observation > & observation, number t) Get the action space. |
virtual double | getDiscount (number t=0) const Get the discount factor at timestep t. |
virtual number | getHorizon () const Get the planning horizon. |
virtual std::shared_ptr< State > | getInternalState () const |
virtual double | getMaxReward (number t=0) const |
virtual double | getMinReward (number t=0) const |
virtual number | getNumAgents () const Get the number of agents. |
virtual std::shared_ptr< Action > | getRandomAction (const std::shared_ptr< Observation > & observation, number t) Get random action. |
virtual std::set< std::shared_ptr< State > > | getReachableStates (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t=0) const Get the reachable next states. |
virtual double | getReward (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t=0) const Get the reward of executing action a in state s at timestep t. |
virtual std::shared_ptr< RewardInterface > | getRewardSpace () const Get the reward function. |
virtual std::shared_ptr< Distribution< std::shared_ptr< State > > > | getStartDistribution () const Get the initial distribution over states. |
virtual std::shared_ptr< StateDynamicsInterface > | getStateDynamics () const Get the state dynamics. |
virtual std::shared_ptr< Space > | getStateSpace (number t=0) const Get ths state space at timestep t. |
virtual double | getTransitionProbability (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, number t=0) const Get the Transition Probability object. |
virtual std::shared_ptr< Observation > | reset () Reset the environment and return initial observation. |
virtual std::shared_ptr< Observation > | sampleNextObservation (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) |
void | setDiscount (double discount) Set the discount factor. |
void | setHorizon (number horizon) Set the planning horizon. |
virtual void | setInternalState (std::shared_ptr< State >) |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action) Do a step on the environment. |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action, bool increment_timestep) |
virtual std::string | toJSON () Encodes MDP class into a string (JSON format). |
virtual std::string | toStdFormat () Encodes MDP class into a string (standard .posg or .dpomdp or .zsposg format). |
virtual std::string | toXML () Encodes MDP class into a string (XML format). |
virtual | ~MDP () |
Public Functions inherited from sdm::MDPInterface
Type | Name |
---|---|
virtual std::shared_ptr< Space > | getActionSpace (number t) const = 0 Get ths action space at timestep t. |
virtual double | getDiscount (number t) const = 0 Get the discount factor at timestep t. |
virtual number | getHorizon () const = 0 Get the number of agents. |
virtual std::shared_ptr< State > | getInternalState () const = 0 |
virtual double | getMaxReward (number t) const = 0 |
virtual double | getMinReward (number t) const = 0 |
virtual number | getNumAgents () const = 0 Get the number of agents. |
virtual std::set< std::shared_ptr< State > > | getReachableStates (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) const = 0 Get reachable states. |
virtual double | getReward (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) const = 0 Get the reward at timestep t when executing an action in a specific state. |
virtual std::shared_ptr< Distribution< std::shared_ptr< State > > > | getStartDistribution () const = 0 Get the initial distribution over states. |
virtual std::shared_ptr< Space > | getStateSpace (number t) const = 0 Get ths state space at timestep t. |
virtual double | getTransitionProbability (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, number t) const = 0 _Get the transition probability, i.e. p(s' |
virtual void | setInternalState (std::shared_ptr< State > state) = 0 |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action) = 0 Do a step on the environment. |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action, bool increment_timestep) = 0 |
Public Functions inherited from sdm::GymInterface
Type | Name |
---|---|
virtual std::shared_ptr< Space > | getActionSpaceAt (const std::shared_ptr< Observation > & observation, number t) = 0 Get the action space. |
virtual std::shared_ptr< Action > | getRandomAction (const std::shared_ptr< Observation > & observation, number t) = 0 Get random action. |
virtual std::shared_ptr< Observation > | reset () = 0 Reset the environment and return initial observation. |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action) = 0 Do a step on the environment. |
Public Functions inherited from sdm::GymInterface
Type | Name |
---|---|
virtual std::shared_ptr< Space > | getActionSpaceAt (const std::shared_ptr< Observation > & observation, number t) = 0 Get the action space. |
virtual std::shared_ptr< Action > | getRandomAction (const std::shared_ptr< Observation > & observation, number t) = 0 Get random action. |
virtual std::shared_ptr< Observation > | reset () = 0 Reset the environment and return initial observation. |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action) = 0 Do a step on the environment. |
Public Functions inherited from sdm::MMDPInterface
Type | Name |
---|---|
virtual std::shared_ptr< Space > | getActionSpace (number agent_id, number t) const = 0 Get ths action space of agent i at timestep t. |
virtual std::shared_ptr< Space > | getActionSpace (number t) const = 0 Get ths action space at timestep t. |
Public Functions inherited from sdm::MDPInterface
Type | Name |
---|---|
virtual std::shared_ptr< Space > | getActionSpace (number t) const = 0 Get ths action space at timestep t. |
virtual double | getDiscount (number t) const = 0 Get the discount factor at timestep t. |
virtual number | getHorizon () const = 0 Get the number of agents. |
virtual std::shared_ptr< State > | getInternalState () const = 0 |
virtual double | getMaxReward (number t) const = 0 |
virtual double | getMinReward (number t) const = 0 |
virtual number | getNumAgents () const = 0 Get the number of agents. |
virtual std::set< std::shared_ptr< State > > | getReachableStates (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) const = 0 Get reachable states. |
virtual double | getReward (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) const = 0 Get the reward at timestep t when executing an action in a specific state. |
virtual std::shared_ptr< Distribution< std::shared_ptr< State > > > | getStartDistribution () const = 0 Get the initial distribution over states. |
virtual std::shared_ptr< Space > | getStateSpace (number t) const = 0 Get ths state space at timestep t. |
virtual double | getTransitionProbability (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, number t) const = 0 _Get the transition probability, i.e. p(s' |
virtual void | setInternalState (std::shared_ptr< State > state) = 0 |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action) = 0 Do a step on the environment. |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action, bool increment_timestep) = 0 |
Public Functions inherited from sdm::GymInterface
Type | Name |
---|---|
virtual std::shared_ptr< Space > | getActionSpaceAt (const std::shared_ptr< Observation > & observation, number t) = 0 Get the action space. |
virtual std::shared_ptr< Action > | getRandomAction (const std::shared_ptr< Observation > & observation, number t) = 0 Get random action. |
virtual std::shared_ptr< Observation > | reset () = 0 Reset the environment and return initial observation. |
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > | step (std::shared_ptr< Action > action) = 0 Do a step on the environment. |
Protected Attributes inherited from sdm::POMDP
See sdm::POMDP
Type | Name |
---|---|
std::shared_ptr< ObservationDynamicsInterface > | observation_dynamics_ |
std::shared_ptr< Space > | observation_space_ |
Protected Attributes inherited from sdm::MDP
See sdm::MDP
Type | Name |
---|---|
std::shared_ptr< Space > | action_space_ |
Criterion | criterion_ |
int | current_timestep_ |
double | discount_ |
number | horizon_ |
std::shared_ptr< State > | internal_state_ |
number | num_agents_ |
std::shared_ptr< RewardInterface > | reward_space_ |
std::shared_ptr< Distribution< std::shared_ptr< State > > > | start_distribution_ |
std::shared_ptr< StateDynamicsInterface > | state_dynamics_ |
std::shared_ptr< Space > | state_space_ |
Protected Attributes inherited from sdm::MDP
See sdm::MDP
Type | Name |
---|---|
std::shared_ptr< Space > | action_space_ |
Criterion | criterion_ |
int | current_timestep_ |
double | discount_ |
number | horizon_ |
std::shared_ptr< State > | internal_state_ |
number | num_agents_ |
std::shared_ptr< RewardInterface > | reward_space_ |
std::shared_ptr< Distribution< std::shared_ptr< State > > > | start_distribution_ |
std::shared_ptr< StateDynamicsInterface > | state_dynamics_ |
std::shared_ptr< Space > | state_space_ |
Public Functions Documentation
function MPOMDP [1/2]
sdm::MPOMDP::MPOMDP ()
function MPOMDP [2/2]
sdm::MPOMDP::MPOMDP (
const std::shared_ptr< Space > & state_space,
const std::shared_ptr< Space > & action_space,
const std::shared_ptr< Space > & obs_space,
const std::shared_ptr< RewardInterface > & reward,
const std::shared_ptr< StateDynamicsInterface > & state_dynamics,
const std::shared_ptr< ObservationDynamicsInterface > & obs_dynamics,
const std::shared_ptr< Distribution < std::shared_ptr< State >>> & start_distrib,
number horizon=0,
double discount=0.99,
Criterion criterion=Criterion::REW_MAX
)
function getObservationSpace [1/2]
virtual std::shared_ptr< Space > sdm::MPOMDP::getObservationSpace (
number agent_id,
number t
) const
Parameters:
agent_id
the identifier of the agentt
the timestep
Returns:
the observation space
Implements sdm::MPOMDPInterface::getObservationSpace
function getObservationSpace [2/2]
virtual std::shared_ptr< Space > sdm::MPOMDP::getObservationSpace (
number t=0
) const
Parameters:
t
the timestep
Returns:
the observation space
Implements sdm::MPOMDPInterface::getObservationSpace
function toStdFormat
virtual std::string sdm::MPOMDP::toStdFormat ()
Returns:
the process as XML
Implements sdm::MMDP::toStdFormat
function ~MPOMDP
virtual sdm::MPOMDP::~MPOMDP ()
The documentation for this class was generated from the following file src/sdm/world/mpomdp.hpp