Class sdm::HierarchicalMPOMDP

Class List > sdm > HierarchicalMPOMDP

The Hierarchical MPOMDP is a transformation of a standardMPOMDP assuming there exists a hierarchy among agents.More...

#include <hierarchical_mpomdp.hpp>

Inherits the following classes: sdm::TransformedMPOMDP

Public Functions

Type	Name
	HierarchicalMPOMDP (const std::shared_ptr< MPOMDPInterface > & mpomdp)
std::shared_ptr< Observation >	getClassicObservation (const std::shared_ptr< Observation > & hierarchical_joint_observation) const Get ths action space at timestep t.
std::shared_ptr< Observation >	getHierarchicalObservation (const std::shared_ptr< Observation > & classic_joint_observation) const
virtual double	getObservationProbability (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, const std::shared_ptr< Observation > & observation, number t) const Get the Obs Probability object.
virtual std::shared_ptr< Space >	getObservationSpace (number t) const Get the reachable next states.
virtual std::shared_ptr< Space >	getObservationSpace (number agent_id, number t) const Get ths observation space of agent i at timestep t.
virtual std::set< std::shared_ptr< Observation > >	getReachableObservations (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, number t) const Get the Reachablel Observations object.
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool >	step (std::shared_ptr< Action > action) Do a step on the environment.
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool >	step (std::shared_ptr< Action > action, bool increment_timestep)

Public Functions inherited from sdm::TransformedMPOMDP

See sdm::TransformedMPOMDP

Type	Name
	TransformedMPOMDP (const std::shared_ptr< MPOMDPInterface > & mpomdp)
virtual std::shared_ptr< Space >	getActionSpace (number agent_id, number t) const Get ths action space of agent i at timestep t.
virtual std::shared_ptr< Space >	getActionSpace (number t) const Get ths action space at timestep t.
virtual std::shared_ptr< Space >	getActionSpaceAt (const std::shared_ptr< Observation > & observation, number t) Get the action space.
virtual double	getDiscount (number t) const Get the discount factor at timestep t.
virtual double	getDynamics (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, const std::shared_ptr< Observation > & observation, number t) const _Get the dynamics, i.e. p(s', o
virtual number	getHorizon () const Get the number of agents.
virtual std::shared_ptr< State >	getInternalState () const
virtual double	getMaxReward (number t) const
virtual double	getMinReward (number t) const
virtual number	getNumAgents () const Get the number of agents.
virtual double	getObservationProbability (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, const std::shared_ptr< Observation > & observation, number t) const _Get the observation probability, i.e. p(o
virtual std::shared_ptr< Space >	getObservationSpace (number agent_id, number t) const Get ths observation space of agent i at timestep t.
virtual std::shared_ptr< Space >	getObservationSpace (number t) const Get ths observation space at timestep t.
virtual std::shared_ptr< Action >	getRandomAction (const std::shared_ptr< Observation > & observation, number t) Get random action.
virtual std::set< std::shared_ptr< Observation > >	getReachableObservations (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, number t) const Get reachable observations.
virtual std::set< std::shared_ptr< State > >	getReachableStates (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) const Get reachable states.
virtual double	getReward (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) const Get the reward at timestep t when executing an action in a specific state.
virtual std::shared_ptr< Distribution< std::shared_ptr< State > > >	getStartDistribution () const Get the initial distribution over states.
virtual std::shared_ptr< Space >	getStateSpace (number t) const Get ths state space at timestep t.
virtual double	getTransitionProbability (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, number t) const _Get the transition probability, i.e. p(s'
virtual std::shared_ptr< Observation >	reset () Reset the environment and return initial observation.
virtual void	setInternalState (std::shared_ptr< State > state)
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool >	step (std::shared_ptr< Action > action) Do a step on the environment.
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool >	step (std::shared_ptr< Action > action, bool increment_timestep)
virtual	~TransformedMPOMDP ()

Public Functions inherited from sdm::MPOMDPInterface

See sdm::MPOMDPInterface

Type	Name
virtual std::shared_ptr< Space >	getObservationSpace (number agent_id, number t) const = 0 Get ths observation space of agent i at timestep t.
virtual std::shared_ptr< Space >	getObservationSpace (number t) const = 0 Get ths observation space at timestep t.

Public Functions inherited from sdm::MMDPInterface

See sdm::MMDPInterface

Type	Name
virtual std::shared_ptr< Space >	getActionSpace (number agent_id, number t) const = 0 Get ths action space of agent i at timestep t.
virtual std::shared_ptr< Space >	getActionSpace (number t) const = 0 Get ths action space at timestep t.

Public Functions inherited from sdm::MDPInterface

See sdm::MDPInterface

Type	Name
virtual std::shared_ptr< Space >	getActionSpace (number t) const = 0 Get ths action space at timestep t.
virtual double	getDiscount (number t) const = 0 Get the discount factor at timestep t.
virtual number	getHorizon () const = 0 Get the number of agents.
virtual std::shared_ptr< State >	getInternalState () const = 0
virtual double	getMaxReward (number t) const = 0
virtual double	getMinReward (number t) const = 0
virtual number	getNumAgents () const = 0 Get the number of agents.
virtual std::set< std::shared_ptr< State > >	getReachableStates (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) const = 0 Get reachable states.
virtual double	getReward (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) const = 0 Get the reward at timestep t when executing an action in a specific state.
virtual std::shared_ptr< Distribution< std::shared_ptr< State > > >	getStartDistribution () const = 0 Get the initial distribution over states.
virtual std::shared_ptr< Space >	getStateSpace (number t) const = 0 Get ths state space at timestep t.
virtual double	getTransitionProbability (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, number t) const = 0 _Get the transition probability, i.e. p(s'
virtual void	setInternalState (std::shared_ptr< State > state) = 0
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool >	step (std::shared_ptr< Action > action) = 0 Do a step on the environment.
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool >	step (std::shared_ptr< Action > action, bool increment_timestep) = 0

Public Functions inherited from sdm::GymInterface

See sdm::GymInterface

Type	Name
virtual std::shared_ptr< Space >	getActionSpaceAt (const std::shared_ptr< Observation > & observation, number t) = 0 Get the action space.
virtual std::shared_ptr< Action >	getRandomAction (const std::shared_ptr< Observation > & observation, number t) = 0 Get random action.
virtual std::shared_ptr< Observation >	reset () = 0 Reset the environment and return initial observation.
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool >	step (std::shared_ptr< Action > action) = 0 Do a step on the environment.

Public Functions inherited from sdm::POMDPInterface

See sdm::POMDPInterface

Type	Name
virtual double	getDynamics (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, const std::shared_ptr< Observation > & observation, number t) const = 0 _Get the dynamics, i.e. p(s', o
virtual double	getObservationProbability (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, const std::shared_ptr< Observation > & observation, number t) const = 0 _Get the observation probability, i.e. p(o
virtual std::shared_ptr< Space >	getObservationSpace (number t) const = 0 Get ths observation space at timestep t.
virtual std::set< std::shared_ptr< Observation > >	getReachableObservations (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, number t) const = 0 Get reachable observations.

Public Functions inherited from sdm::MDPInterface

See sdm::MDPInterface

Type	Name
virtual std::shared_ptr< Space >	getActionSpace (number t) const = 0 Get ths action space at timestep t.
virtual double	getDiscount (number t) const = 0 Get the discount factor at timestep t.
virtual number	getHorizon () const = 0 Get the number of agents.
virtual std::shared_ptr< State >	getInternalState () const = 0
virtual double	getMaxReward (number t) const = 0
virtual double	getMinReward (number t) const = 0
virtual number	getNumAgents () const = 0 Get the number of agents.
virtual std::set< std::shared_ptr< State > >	getReachableStates (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) const = 0 Get reachable states.
virtual double	getReward (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) const = 0 Get the reward at timestep t when executing an action in a specific state.
virtual std::shared_ptr< Distribution< std::shared_ptr< State > > >	getStartDistribution () const = 0 Get the initial distribution over states.
virtual std::shared_ptr< Space >	getStateSpace (number t) const = 0 Get ths state space at timestep t.
virtual double	getTransitionProbability (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_state, number t) const = 0 _Get the transition probability, i.e. p(s'
virtual void	setInternalState (std::shared_ptr< State > state) = 0
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool >	step (std::shared_ptr< Action > action) = 0 Do a step on the environment.
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool >	step (std::shared_ptr< Action > action, bool increment_timestep) = 0

Public Functions inherited from sdm::GymInterface

See sdm::GymInterface

Type	Name
virtual std::shared_ptr< Space >	getActionSpaceAt (const std::shared_ptr< Observation > & observation, number t) = 0 Get the action space.
virtual std::shared_ptr< Action >	getRandomAction (const std::shared_ptr< Observation > & observation, number t) = 0 Get random action.
virtual std::shared_ptr< Observation >	reset () = 0 Reset the environment and return initial observation.
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool >	step (std::shared_ptr< Action > action) = 0 Do a step on the environment.

Protected Types

Type	Name
typedef boost::bimaps::bimap< std::shared_ptr< Observation >, std::shared_ptr< Observation > >	obs_bimap
typedef obs_bimap::value_type	obs_bimap_value

Protected Attributes

Type	Name
obs_bimap	bimap_classic_to_hierarchical_obs The mapping from joint observation to hierarchical observation.
Joint< std::shared_ptr< DiscreteSpace > >	indiv_observation_spaces_ Keep transformed individual observation space for each agent.
std::shared_ptr< DiscreteSpace >	joint_observation_space_ The transformed joint observation space.

Protected Attributes inherited from sdm::TransformedMPOMDP

See sdm::TransformedMPOMDP

Type	Name
std::shared_ptr< MPOMDPInterface >	mpomdp_ The underlying MPOMDP .

Protected Functions

Type	Name
void	setupObservationSpace () This function will transform the observation space of the MPOMDP as if it was a hierarchical problem.

Detailed Description

More precisely, each agent receive observations of its surbordinates in addition to its own observation.

Example - tiger problem :

(hear-left, hear-left) > ((hear-left), (hear-left, hear-left)) (hear-left, hear-right) > ((hear-left), (hear-left, hear-right)) (hear-right, hear-left) > ((hear-right), (hear-right, hear-left)) (hear-right, hear-right) > ((hear-right), (hear-right, hear-right))

Public Functions Documentation

function HierarchicalMPOMDP

sdm::HierarchicalMPOMDP::HierarchicalMPOMDP (
    const std::shared_ptr< MPOMDPInterface > & mpomdp
)

function getClassicObservation

std::shared_ptr< Observation > sdm::HierarchicalMPOMDP::getClassicObservation (
    const std::shared_ptr< Observation > & hierarchical_joint_observation
) const

Parameters:

t the timestep

Returns:

the action space

function getHierarchicalObservation

std::shared_ptr< Observation > sdm::HierarchicalMPOMDP::getHierarchicalObservation (
    const std::shared_ptr< Observation > & classic_joint_observation
) const

function getObservationProbability

virtual double sdm::HierarchicalMPOMDP::getObservationProbability (
    const std::shared_ptr< State > & state,
    const std::shared_ptr< Action > & action,
    const std::shared_ptr< State > & next_state,
    const std::shared_ptr< Observation > & observation,
    number t
) const

Parameters:

state
action
next_state
observation
t

Returns:

double

Implements sdm::POMDPInterface::getObservationProbability

function getObservationSpace [1/2]

virtual std::shared_ptr< Space > sdm::HierarchicalMPOMDP::getObservationSpace (
    number t
) const

Parameters:

state the state
action the action

Returns:

the set of reachable states

Implements sdm::MPOMDPInterface::getObservationSpace

function getObservationSpace [2/2]

virtual std::shared_ptr< Space > sdm::HierarchicalMPOMDP::getObservationSpace (
    number agent_id,
    number t
) const

Parameters:

agent_id the identifier of the agent
t the timestep

Returns:

the observation space

Implements sdm::MPOMDPInterface::getObservationSpace

function getReachableObservations

virtual std::set< std::shared_ptr< Observation > > sdm::HierarchicalMPOMDP::getReachableObservations (
    const std::shared_ptr< State > & state,
    const std::shared_ptr< Action > & action,
    const std::shared_ptr< State > & next_state,
    number t
) const

Parameters:

state
action
next_state
t

Returns:

std::set<std::shared_ptr<Observation>>

Implements sdm::POMDPInterface::getReachableObservations

function step [1/2]

virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > sdm::HierarchicalMPOMDP::step (
    std::shared_ptr< Action > action
)

Parameters:

action the action to execute

Returns:

the information produced. Include : next observation, rewards, episode done

Implements sdm::MDPInterface::step

function step [2/2]

virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > sdm::HierarchicalMPOMDP::step (
    std::shared_ptr< Action > action,
    bool increment_timestep
)

Implements sdm::MDPInterface::step

Protected Types Documentation

typedef obs_bimap

using sdm::HierarchicalMPOMDP::obs_bimap =  boost::bimaps::bimap<std::shared_ptr<Observation>, std::shared_ptr<Observation> >;

typedef obs_bimap_value

using sdm::HierarchicalMPOMDP::obs_bimap_value =  obs_bimap::value_type;

Protected Attributes Documentation

variable bimap_classic_to_hierarchical_obs

obs_bimap sdm::HierarchicalMPOMDP::bimap_classic_to_hierarchical_obs;

variable indiv_observation_spaces_

Joint<std::shared_ptr<DiscreteSpace> > sdm::HierarchicalMPOMDP::indiv_observation_spaces_;

variable joint_observation_space_

std::shared_ptr<DiscreteSpace> sdm::HierarchicalMPOMDP::joint_observation_space_;

Protected Functions Documentation

function setupObservationSpace

void sdm::HierarchicalMPOMDP::setupObservationSpace ()

The documentation for this class was generated from the following file src/sdm/world/hierarchical_mpomdp.hpp