Class sdm::QLearning

template <class TInput class TInput>

Class List > sdm > QLearning

Q-Learning and its extensions (DQN, etc).

  • #include <q_learning.hpp>

Inherits the following classes: sdm::Algorithm

Public Functions

Type Name
QLearning (std::shared_ptr< GymInterface > & env, std::shared_ptr< ExperienceMemoryInterface > experience_memory, std::shared_ptr< QValueFunction< TInput >> q_value_table, std::shared_ptr< QValueFunction< TInput >> q_value_table_target, std::shared_ptr< QValueBackupInterface > backup, std::shared_ptr< EpsGreedy > exploration, number horizon, double discount=0.9, double lr=0.001, double smooth=0.99, unsigned long num_episodes=10000, std::string name="qlearning")
void do_episode ()
Execute an episode.
virtual void do_initialize ()
Initialize the algorithm. Initialize the Q-Value function.
virtual void do_save ()
Save the value function.
virtual void do_solve ()
Learning procedure. Will attempt to solve the problem.
void do_step ()
Execute a step.
virtual void do_test ()
Test the result of a problem.
virtual double getResult ()
double getResultOpti ()
virtual int getTrial ()
void initLogger ()
std::shared_ptr< Action > select_action (const std::shared_ptr< Observation > & observation, number t)
std::shared_ptr< Action > select_greedy_action (const std::shared_ptr< Observation > & observation, number t)
void update_model ()
Update the q-value functions based on the memory/experience.
void update_target ()
Update the target model.

Public Functions inherited from sdm::Algorithm

See sdm::Algorithm

Type Name
virtual void do_initialize () = 0
Initialize the algorithm.
virtual void do_save () = 0
Save the policy in a file.
virtual void do_solve () = 0
Solve the problem.
virtual void do_test () = 0
Test the result of the algorithm.
virtual double getResult () = 0
virtual int getTrial () = 0
virtual ~Algorithm ()

Protected Attributes

Type Name
double E_R
double R
std::shared_ptr< QValueBackupInterface > backup_
double discount_
std::shared_ptr< GymInterface > env_
The problem to be solved.
unsigned long episode
std::shared_ptr< ExperienceMemoryInterface > experience_memory_
The experience memory.
std::shared_ptr< EpsGreedy > exploration_process
The exploration process.
unsigned long global_step
number horizon_
Some hyperparameters for the algorithm.
std::shared_ptr< MultiLogger > logger_
The logger.
double lr_
std::string name_ = = "qlearning"
unsigned long num_episodes_
std::shared_ptr< QValueFunction< TInput > > q_value_table_
Q-value function.
std::shared_ptr< QValueFunction< TInput > > q_value_table_target_
Q-value target function.
std::vector< double > rewards_
double smooth_
number step

Public Functions Documentation

function QLearning

sdm::QLearning::QLearning (
    std::shared_ptr< GymInterface > & env,
    std::shared_ptr< ExperienceMemoryInterface > experience_memory,
    std::shared_ptr< QValueFunction < TInput >> q_value_table,
    std::shared_ptr< QValueFunction < TInput >> q_value_table_target,
    std::shared_ptr< QValueBackupInterface > backup,
    std::shared_ptr< EpsGreedy > exploration,
    number horizon,
    double discount=0.9,
    double lr=0.001,
    double smooth=0.99,
    unsigned long num_episodes=10000,
    std::string name="qlearning"
) 

function do_episode

void sdm::QLearning::do_episode () 

function do_initialize

virtual void sdm::QLearning::do_initialize () 

Implements sdm::Algorithm::do_initialize

function do_save

virtual void sdm::QLearning::do_save () 

Implements sdm::Algorithm::do_save

function do_solve

virtual void sdm::QLearning::do_solve () 

Implements sdm::Algorithm::do_solve

function do_step

void sdm::QLearning::do_step () 

function do_test

virtual void sdm::QLearning::do_test () 

Implements sdm::Algorithm::do_test

function getResult

inline virtual double sdm::QLearning::getResult () 

Implements sdm::Algorithm::getResult

function getResultOpti

inline double sdm::QLearning::getResultOpti () 

function getTrial

inline virtual int sdm::QLearning::getTrial () 

Implements sdm::Algorithm::getTrial

function initLogger

void sdm::QLearning::initLogger () 

function select_action

std::shared_ptr< Action > sdm::QLearning::select_action (
    const std::shared_ptr< Observation > & observation,
    number t
) 

function select_greedy_action

std::shared_ptr< Action > sdm::QLearning::select_greedy_action (
    const std::shared_ptr< Observation > & observation,
    number t
) 

function update_model

void sdm::QLearning::update_model () 

function update_target

void sdm::QLearning::update_target () 

Protected Attributes Documentation

variable E_R

double sdm::QLearning< TInput >::E_R;

variable R

double sdm::QLearning< TInput >::R;

variable backup_

std::shared_ptr<QValueBackupInterface> sdm::QLearning< TInput >::backup_;

variable discount_

double sdm::QLearning< TInput >::discount_;

variable env_

std::shared_ptr<GymInterface> sdm::QLearning< TInput >::env_;

variable episode

unsigned long sdm::QLearning< TInput >::episode;

variable experience_memory_

std::shared_ptr<ExperienceMemoryInterface> sdm::QLearning< TInput >::experience_memory_;

variable exploration_process

std::shared_ptr<EpsGreedy> sdm::QLearning< TInput >::exploration_process;

variable global_step

unsigned long sdm::QLearning< TInput >::global_step;

variable horizon_

number sdm::QLearning< TInput >::horizon_;

variable logger_

std::shared_ptr<MultiLogger> sdm::QLearning< TInput >::logger_;

variable lr_

double sdm::QLearning< TInput >::lr_;

variable name_

std::string sdm::QLearning< TInput >::name_;

variable num_episodes_

unsigned long sdm::QLearning< TInput >::num_episodes_;

variable q_value_table_

std::shared_ptr<QValueFunction<TInput> > sdm::QLearning< TInput >::q_value_table_;

variable q_value_table_target_

std::shared_ptr<QValueFunction<TInput> > sdm::QLearning< TInput >::q_value_table_target_;

variable rewards_

std::vector<double> sdm::QLearning< TInput >::rewards_;

variable smooth_

double sdm::QLearning< TInput >::smooth_;

variable step

number sdm::QLearning< TInput >::step;

The documentation for this class was generated from the following file src/sdm/algorithms/q_learning.hpp