Class sdm::BaseBeliefMDP

template <class TBelief>

Class List > sdm > BaseBeliefMDP

This class provides a way to transform a POMDP into beliefMDP formalism.More...

#include <belief_mdp.hpp>

Inherits the following classes: sdm::SolvableByMDP, sdm::GymInterface

Public Attributes

Type	Name
int	batch_size_
std::shared_ptr< State >	current_state_ The current state (used in RL).
std::shared_ptr< Graph< std::shared_ptr< State >, Pair< std::shared_ptr< Action >, std::shared_ptr< Observation > > > >	mdp_graph_ the MDP __Graph (graph of state transition)
std::shared_ptr< Graph< double, Pair< std::shared_ptr< State >, std::shared_ptr< Action > > > >	reward_graph_
RecursiveMap< TBelief, std::shared_ptr< State > >	state_space_ A pointer on the bag containing all states.
int	step_ The current timestep (used in RL).
bool	store_actions_ = = true
bool	store_states_ = = true Hyperparameters.
RecursiveMap< std::shared_ptr< State >, std::shared_ptr< Action >, std::shared_ptr< Observation >, double >	transition_probability _The probability transition. (i.e. p(o

Public Functions

Type	Name
	BaseBeliefMDP ()
	BaseBeliefMDP (const std::shared_ptr< POMDPInterface > & pomdp, int batch_size=0)
virtual Pair< std::shared_ptr< State >, std::shared_ptr< State > >	computeExactNextState (const std::shared_ptr< State > & belief, const std::shared_ptr< Action > & action, const std::shared_ptr< Observation > & observation, number t=0)
virtual std::shared_ptr< State >	computeNextState (const std::shared_ptr< State > & belief, const std::shared_ptr< Action > & action, const std::shared_ptr< Observation > & observation, number t=0)
virtual Pair< std::shared_ptr< State >, double >	computeNextStateAndProbability (const std::shared_ptr< State > & belief, const std::shared_ptr< Action > & action, const std::shared_ptr< Observation > & observation, number t=0) Compute the state transition in order to return next state and associated probability.
virtual Pair< std::shared_ptr< State >, std::shared_ptr< State > >	computeSampledNextState (const std::shared_ptr< State > & belief, const std::shared_ptr< Action > & action, const std::shared_ptr< Observation > & observation, number t=0)
virtual std::shared_ptr< Space >	getActionSpaceAt (const std::shared_ptr< State > & belief, number t=0) Get the action space at a specific belief and timestep.
virtual std::shared_ptr< Space >	getActionSpaceAt (const std::shared_ptr< Observation > & observation, number t) Get the action space.
virtual double	getExpectedNextValue (const std::shared_ptr< ValueFunction > & value_function, const std::shared_ptr< State > & belief, const std::shared_ptr< Action > & action, number t=0) Get the expected next value.
std::shared_ptr< Graph< std::shared_ptr< State >, Pair< std::shared_ptr< Action >, std::shared_ptr< Observation > > > >	getMDPGraph () Get the graph of.
virtual Pair< std::shared_ptr< State >, double >	getNextState (const std::shared_ptr< ValueFunction > & value_function, const std::shared_ptr< State > & belief, const std::shared_ptr< Action > & action, const std::shared_ptr< Observation > & observation, number t)
virtual double	getObservationProbability (const std::shared_ptr< State > & belief, const std::shared_ptr< Action > & action, const std::shared_ptr< State > & next_belief, const std::shared_ptr< Observation > & obs, number t=0) const Get the Observation _Probability p(o
virtual std::shared_ptr< Space >	getObservationSpaceAt (const std::shared_ptr< State > &, const std::shared_ptr< Action > &, number t)
virtual std::shared_ptr< Action >	getRandomAction (const std::shared_ptr< Observation > & observation, number t) Get random action.
virtual double	getReward (const std::shared_ptr< State > & belief, const std::shared_ptr< Action > & action, number t=0) Get the expected reward of executing a specific action in a specific belief at timestep t.
std::vector< std::shared_ptr< State > >	getStoredStates () const
virtual std::shared_ptr< POMDPInterface >	getUnderlyingPOMDP () const Get the address of the underlying POMDP .
virtual std::shared_ptr< State >	nextBelief (const std::shared_ptr< State > & belief, const std::shared_ptr< Action > & action, const std::shared_ptr< Observation > & observation, number t=0)
virtual Pair< std::shared_ptr< State >, double >	nextBeliefAndProba (const std::shared_ptr< State > & belief, const std::shared_ptr< Action > & action, const std::shared_ptr< Observation > & observation, number t=0) Get the next belief.
virtual std::shared_ptr< State >	nextState (const std::shared_ptr< State > & belief, const std::shared_ptr< Action > & action, number t=0, const std::shared_ptr< HSVI > & hsvi=nullptr) Select the next belief.
virtual std::shared_ptr< Observation >	reset () Reset the environment and return initial observation.
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool >	step (std::shared_ptr< Action > action) Do a step on the environment.
	~BaseBeliefMDP ()

Public Functions inherited from sdm::SolvableByMDP

See sdm::SolvableByMDP

Type	Name
	SolvableByMDP () Default constructor.
	SolvableByMDP (const std::shared_ptr< MDPInterface > & mdp) Construct a problem solvable by HSVI .
virtual double	do_excess (double incumbent, double lb_value, double ub_value, double cost_so_far, double error, number horizon) Compute the excess of the HSVI paper. It refers to the termination condition.
virtual std::shared_ptr< Space >	getActionSpaceAt (const std::shared_ptr< State > & state, number t=0) Get the action space at a specific state and timestep. The state dependency is required when the game forbid the usage of a number of actions in this state. It is also used in some reformulated problems where actions are decision rules. The time dependency is required in extensive-form games in which some agents have a different action space.
virtual double	getDiscount (number t=0) const Get the specific discount factor for the problem at hand.
virtual double	getExpectedNextValue (const std::shared_ptr< ValueFunction > & value_function, const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t=0) Get the expected next value.
virtual std::shared_ptr< State >	getInitialState () Get the initial state.
virtual Pair< std::shared_ptr< State >, double >	getNextState (const std::shared_ptr< ValueFunction > & value_function, const std::shared_ptr< State > & belief, const std::shared_ptr< Action > & action, const std::shared_ptr< Observation > & observation, number t)
virtual std::shared_ptr< Space >	getObservationSpaceAt (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t)
virtual double	getReward (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t=0) Get the reward of executing a specific action in an specific state at timestep t. The time dependency can be required in non-stationnary problems.
virtual const std::shared_ptr< MDPInterface > &	getUnderlyingProblem () const Get the well defined underlying problem. Some problems are solvable by DP algorithms even if they are not well defined. Usually, they simply are reformulation of an underlying well defined problem. For instance, the underlying DecPOMDP of the OccupancyMDP or the underlying POMDP of the current BeliefMDP.
virtual double	getWeightedDiscount (number t) Get the specific weighted discount factor for the problem at hand.
virtual bool	isSerialized () const Check if the problem is serialized.
virtual std::shared_ptr< State >	nextState (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t=0, const std::shared_ptr< HSVI > & hsvi=nullptr) Select the next state.
virtual Pair< std::shared_ptr< Action >, double >	selectNextAction (const std::shared_ptr< ValueFunction > & lb, const std::shared_ptr< ValueFunction > & ub, const std::shared_ptr< State > & s, number h) Select the next action.
virtual void	setInitialState (const std::shared_ptr< State > & state)

Public Functions inherited from sdm::SolvableByHSVI

See sdm::SolvableByHSVI

Type	Name
virtual double	do_excess (double incumbent, double lb_value, double ub_value, double cost_so_far, double error, number t) = 0 Compute the excess of the HSVI paper. It refers to the termination condition.
virtual std::shared_ptr< Space >	getActionSpaceAt (const std::shared_ptr< State > & state, number t) = 0 Get the action space at a specific state and timestep. The state dependency is required when the game forbid the usage of a number of actions in this state. It is also used in some reformulated problems where actions are decision rules. The time dependency is required in extensive-form games in which some agents have a different action space.
virtual double	getDiscount (number t) const = 0 Get the specific discount factor for the problem at hand.
virtual double	getExpectedNextValue (const std::shared_ptr< ValueFunction > & value_function, const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) = 0 Get the expected next value.
virtual std::shared_ptr< State >	getInitialState () = 0 Get the initial state.
virtual Pair< std::shared_ptr< State >, double >	getNextState (const std::shared_ptr< ValueFunction > & value_function, const std::shared_ptr< State > & belief, const std::shared_ptr< Action > & action, const std::shared_ptr< Observation > & observation, number t) = 0
virtual std::shared_ptr< Space >	getObservationSpaceAt (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) = 0
virtual double	getReward (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t) = 0 Get the reward of executing a specific action in an specific state at timestep t. The time dependency can be required in non-stationnary problems.
virtual const std::shared_ptr< MDPInterface > &	getUnderlyingProblem () const = 0 Get the well defined underlying problem. Some problems are solvable by DP algorithms even if they are not well defined. Usually, they simply are reformulation of an underlying well defined problem. For instance, the underlying DecPOMDP of the OccupancyMDP or the underlying POMDP of the current BeliefMDP.
virtual double	getWeightedDiscount (number t) = 0 Get the specific weighted discount factor for the problem at hand.
virtual bool	isSerialized () const = 0 Check if the problem is serialized.
virtual std::shared_ptr< State >	nextState (const std::shared_ptr< State > & state, const std::shared_ptr< Action > & action, number t=0, const std::shared_ptr< HSVI > & hsvi=nullptr) = 0 Select the next state.
virtual Pair< std::shared_ptr< Action >, double >	selectNextAction (const std::shared_ptr< ValueFunction > & lb, const std::shared_ptr< ValueFunction > & ub, const std::shared_ptr< State > & state, number t) = 0 Select the next action.
virtual void	setInitialState (const std::shared_ptr< State > &) = 0
virtual	~SolvableByHSVI ()

Public Functions inherited from sdm::GymInterface

See sdm::GymInterface

Type	Name
virtual std::shared_ptr< Space >	getActionSpaceAt (const std::shared_ptr< Observation > & observation, number t) = 0 Get the action space.
virtual std::shared_ptr< Action >	getRandomAction (const std::shared_ptr< Observation > & observation, number t) = 0 Get random action.
virtual std::shared_ptr< Observation >	reset () = 0 Reset the environment and return initial observation.
virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool >	step (std::shared_ptr< Action > action) = 0 Do a step on the environment.

Protected Attributes inherited from sdm::SolvableByMDP

See sdm::SolvableByMDP

Type	Name
std::shared_ptr< State >	initial_state_ The initial state.
std::shared_ptr< MDPInterface >	underlying_problem_ The underlying well defined problem.

Protected Functions inherited from sdm::SolvableByMDP

See sdm::SolvableByMDP

Type	Name
const std::shared_ptr< MDPInterface > &	getUnderlyingMDP () const Get the underlying mdp.

Detailed Description

This problem reformulation can be used to solve the underlying POMDP thanks to standard dynamic programming algorithms.

Public Attributes Documentation

variable batch_size_

int sdm::BaseBeliefMDP< TBelief >::batch_size_;

variable current_state_

std::shared_ptr<State> sdm::BaseBeliefMDP< TBelief >::current_state_;

variable mdp_graph_

std::shared_ptr<Graph<std::shared_ptr<State>, Pair<std::shared_ptr<Action>, std::shared_ptr<Observation> > > > sdm::BaseBeliefMDP< TBelief >::mdp_graph_;

variable reward_graph_

std::shared_ptr<Graph<double, Pair<std::shared_ptr<State>, std::shared_ptr<Action> > > > sdm::BaseBeliefMDP< TBelief >::reward_graph_;

variable state_space_

RecursiveMap<TBelief, std::shared_ptr<State> > sdm::BaseBeliefMDP< TBelief >::state_space_;

variable step_

int sdm::BaseBeliefMDP< TBelief >::step_;

variable store_actions_

bool sdm::BaseBeliefMDP< TBelief >::store_actions_;

variable store_states_

bool sdm::BaseBeliefMDP< TBelief >::store_states_;

variable transition_probability

RecursiveMap<std::shared_ptr<State>, std::shared_ptr<Action>, std::shared_ptr<Observation>, double> sdm::BaseBeliefMDP< TBelief >::transition_probability;

Public Functions Documentation

function BaseBeliefMDP [1/2]

sdm::BaseBeliefMDP::BaseBeliefMDP ()

function BaseBeliefMDP [2/2]

sdm::BaseBeliefMDP::BaseBeliefMDP (
    const std::shared_ptr< POMDPInterface > & pomdp,
    int batch_size=0
)

function computeExactNextState

virtual Pair < std::shared_ptr< State >, std::shared_ptr< State > > sdm::BaseBeliefMDP::computeExactNextState (
    const std::shared_ptr< State > & belief,
    const std::shared_ptr< Action > & action,
    const std::shared_ptr< Observation > & observation,
    number t=0
)

function computeNextState

virtual std::shared_ptr< State > sdm::BaseBeliefMDP::computeNextState (
    const std::shared_ptr< State > & belief,
    const std::shared_ptr< Action > & action,
    const std::shared_ptr< Observation > & observation,
    number t=0
)

function computeNextStateAndProbability

virtual Pair < std::shared_ptr< State >, double > sdm::BaseBeliefMDP::computeNextStateAndProbability (
    const std::shared_ptr< State > & belief,
    const std::shared_ptr< Action > & action,
    const std::shared_ptr< Observation > & observation,
    number t=0
)

Parameters:

belief the belief
action the action
observation the observation
t the timestep

Returns:

the couple (next state, transition probability in the next state)

This function can be modify in an inherited class to define a belief MDP with a different representation of the belief state. (i.e. OccupancyMDP inherit from BaseBeliefMDP with TBelief = OccupancyState)

function computeSampledNextState

virtual Pair < std::shared_ptr< State >, std::shared_ptr< State > > sdm::BaseBeliefMDP::computeSampledNextState (
    const std::shared_ptr< State > & belief,
    const std::shared_ptr< Action > & action,
    const std::shared_ptr< Observation > & observation,
    number t=0
)

function getActionSpaceAt [1/2]

virtual std::shared_ptr< Space > sdm::BaseBeliefMDP::getActionSpaceAt (
    const std::shared_ptr< State > & belief,
    number t=0
)

Parameters:

belief the belief
t the timestep

Returns:

the action space

The time dependency is required in extensive-form games in which some agents have a different action space.

Implements sdm::SolvableByHSVI::getActionSpaceAt

function getActionSpaceAt [2/2]

virtual std::shared_ptr< Space > sdm::BaseBeliefMDP::getActionSpaceAt (
    const std::shared_ptr< Observation > & observation,
    number t
)

Parameters:

observation the observation in consideration
t time step

Returns:

the action space.

Implements sdm::GymInterface::getActionSpaceAt

function getExpectedNextValue

virtual double sdm::BaseBeliefMDP::getExpectedNextValue (
    const std::shared_ptr< ValueFunction > & value_function,
    const std::shared_ptr< State > & belief,
    const std::shared_ptr< Action > & action,
    number t=0
)

Parameters:

value_function a pointer on the value function to use to perform the calculus.
state the state on which to evaluate the next expected value *
action
t

Returns:

double

Implements sdm::SolvableByHSVI::getExpectedNextValue

function getMDPGraph

std::shared_ptr< Graph < std::shared_ptr< State >, Pair < std::shared_ptr< Action >, std::shared_ptr< Observation > > > > sdm::BaseBeliefMDP::getMDPGraph ()

Returns:

std::shared_ptr<Graph<std::shared_ptr<State>, Pair<std::shared_ptr<Action>, std::shared_ptr<Observation>>>>

function getNextState

virtual Pair < std::shared_ptr< State >, double > sdm::BaseBeliefMDP::getNextState (
    const std::shared_ptr< ValueFunction > & value_function,
    const std::shared_ptr< State > & belief,
    const std::shared_ptr< Action > & action,
    const std::shared_ptr< Observation > & observation,
    number t
)

Implements sdm::SolvableByMDP::getNextState

function getObservationProbability

virtual double sdm::BaseBeliefMDP::getObservationProbability (
    const std::shared_ptr< State > & belief,
    const std::shared_ptr< Action > & action,
    const std::shared_ptr< State > & next_belief,
    const std::shared_ptr< Observation > & obs,
    number t=0
) const

function getObservationSpaceAt

virtual std::shared_ptr< Space > sdm::BaseBeliefMDP::getObservationSpaceAt (
    const std::shared_ptr< State > &,
    const std::shared_ptr< Action > &,
    number t
)

Implements sdm::SolvableByMDP::getObservationSpaceAt

function getRandomAction

virtual std::shared_ptr< Action > sdm::BaseBeliefMDP::getRandomAction (
    const std::shared_ptr< Observation > & observation,
    number t
)

Parameters:

observation the observation in consideration.
t time step.

Returns:

the random action.

Implements sdm::GymInterface::getRandomAction

function getReward

virtual double sdm::BaseBeliefMDP::getReward (
    const std::shared_ptr< State > & belief,
    const std::shared_ptr< Action > & action,
    number t=0
)

Parameters:

belief the belief
action the action
t the timestep

Returns:

the reward

The time dependency can be required in non-stationnary problems.

Implements sdm::SolvableByHSVI::getReward

function getStoredStates

std::vector< std::shared_ptr< State > > sdm::BaseBeliefMDP::getStoredStates () const

function getUnderlyingPOMDP

virtual std::shared_ptr< POMDPInterface > sdm::BaseBeliefMDP::getUnderlyingPOMDP () const

function nextBelief

virtual std::shared_ptr< State > sdm::BaseBeliefMDP::nextBelief (
    const std::shared_ptr< State > & belief,
    const std::shared_ptr< Action > & action,
    const std::shared_ptr< Observation > & observation,
    number t=0
)

function nextBeliefAndProba

virtual Pair < std::shared_ptr< State >, double > sdm::BaseBeliefMDP::nextBeliefAndProba (
    const std::shared_ptr< State > & belief,
    const std::shared_ptr< Action > & action,
    const std::shared_ptr< Observation > & observation,
    number t=0
)

Parameters:

belief the belief
action the action
observation the observation
t the timestep

Returns:

the next belief

This function returns the next belief. To do so, we check in the MDP graph the existance of an edge (action / observation) starting from the current belief. If exists, we return the associated next belief. Otherwise, we compute the next belief using "computeNextStateAndProba" function and add the edge from the current belief to the next belief in the graph.

function nextState

virtual std::shared_ptr< State > sdm::BaseBeliefMDP::nextState (
    const std::shared_ptr< State > & belief,
    const std::shared_ptr< Action > & action,
    number t=0,
    const std::shared_ptr< HSVI > & hsvi=nullptr
)

Parameters:

belief the current belief
action the action
t the timestep
hsvi a pointer on the algorithm that makes the call

Returns:

the next state

Implements sdm::SolvableByMDP::nextState

function reset

virtual std::shared_ptr< Observation > sdm::BaseBeliefMDP::reset ()

Returns:

the initial observation

Implements sdm::GymInterface::reset

function step

virtual std::tuple< std::shared_ptr< Observation >, std::vector< double >, bool > sdm::BaseBeliefMDP::step (
    std::shared_ptr< Action > action
)

Parameters:

action the action to execute

Returns:

the information produced. Include : next observation, rewards, episode done

Implements sdm::GymInterface::step

function ~BaseBeliefMDP

sdm::BaseBeliefMDP::~BaseBeliefMDP ()

The documentation for this class was generated from the following file src/sdm/world/belief_mdp.hpp