Merge pull request #60 from ZimmermanGroup/curriculum-fix

Curriculum Fix
ZimmermanGroup · Jun 5, 2022 · 4a3ac28 · 4a3ac28
2 parents f1fb3d4 + f337274
commit 4a3ac28
Show file tree

Hide file tree

Showing 30 changed files with 509 additions and 115 deletions.
diff --git a/docs/source/agents/agents.rst b/docs/source/agents/agents.rst
@@ -6,4 +6,5 @@ Agents
    :caption: Agents
 
    prebuilt
+   curriculum_agents
    base
diff --git a/docs/source/agents/curriculum_agents.rst b/docs/source/agents/curriculum_agents.rst
@@ -0,0 +1,4 @@
+.. automodule:: conformer_rl.agents.curriculum_agents
+    :members:
+    :show-inheritance:
+    :inherited-members:
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -24,7 +24,7 @@
 author = 'Runxuan Jiang'
 
 # The full version, including alpha/beta/rc tags
-release = '1.0.0'
+release = '1.1.0'
 
 
 # -- General configuration ---------------------------------------------------

diff --git a/docs/source/environments/curriculum_conformer_env.rst b/docs/source/environments/curriculum_conformer_env.rst
@@ -0,0 +1,3 @@
+.. automodule:: conformer_rl.environments.curriculum_conformer_env
+    :members:
+    :show-inheritance:
diff --git a/docs/source/environments/environments.rst b/docs/source/environments/environments.rst
@@ -7,5 +7,6 @@ Environments
 
     prebuilt_environments
     conformer_env
+    curriculum_conformer_env
     components/environment_components
     environment_wrapper
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -17,6 +17,7 @@ Introduction
    tutorial/model_tuning
    tutorial/customizing_env_1
    tutorial/customizing_env_2
+   tutorial/curriculum
 
 .. toctree::
    :caption: API Reference

diff --git a/docs/source/tutorial/curriculum.rst b/docs/source/tutorial/curriculum.rst
@@ -0,0 +1,72 @@
+Utilizing Curriculum Learning
+=============================
+This section walks through how to train an agent using curriculum learning.
+
+What is Curriculum Learning?
+----------------------------
+Curriculum learning can be viewed as applying transfer learning iteratively. In order to train an agent on a specific task, the agent
+is first on a similar but easier task. Once the agent has learned that task, it will then be trained on a slightly more difficult task. This continues until the agent is trained directly on the original task.
+
+Previous empirical results have shown that through curriculum learning, an agent can learn difficult tasks that it is not able to learn by training directly on the task itself. Even if it is able to learn a task by training directly on that task, curriculum learning often makes the training process more efficient (it reduces the training time required).
+
+:mod:`conformer_rl` contains implementations of mixin classes that can make any of the included environments and agents compatible with curriculum learning.
+
+Curriculum Learning Example Training Script
+-------------------------------------------
+The full code for this example can be found in `examples/curriculum_example.py <https://github.com/ZimmermanGroup/conformer-rl/blob/master/examples/curriculum_example.py>`_.
+
+In this example, we want to train an agent to generate conformers for a branched alkane molecule with 16 carbon atoms. However, instead of training directly on this molecule, we will utilize a curriculum where the agent begins by training on a branched alkane with 8 atoms, and then iteratively moves up to a branched alkane with 15 atoms.
+
+We first generate the :class:`~conformer_rl.config.mol_config.MolConfig` objects for the training and evaluation environments. For the training environment, we want a list of :class:`~conformer_rl.config.mol_config.MolConfig` objects starting with a branched alkane with 8 carbon atoms, up to a branched alkane with 15 carbon atoms::
+
+    # Create mol_configs for the curriculum
+    mol_configs = [config_from_rdkit(generate_branched_alkane(i), num_conformers=200, calc_normalizers=True) for i in range(8, 16)]
+
+Next, we create a mol_config for the evaluation environment. Note that the evaluation environment will not be a curriculum environment since we are only evaluating the agent on a single conformer::
+
+    eval_mol_config = config_from_rdkit(generate_branched_alkane(16), num_conformers=200, calc_normalizers=True)
+
+Next, we will set up the :class:`~conformer_rl.config.agent_config.Config` object for the agent and hyperparameters as we have done in the previous sections::
+
+    config = Config()
+    config.tag = 'curriculum_test'
+    config.network = RTGNRecurrent(6, 128, edge_dim=6, node_dim=5).to(device)
+
+    # Batch Hyperparameters
+    config.max_steps = 100000
+
+    # training Hyperparameters
+    lr = 5e-6 * np.sqrt(10)
+    config.optimizer_fn = lambda params: torch.optim.Adam(params, lr=lr, eps=1e-5)
+
+    # curriculum Hyperparameters
+    config.curriculum_agent_buffer_len = 20
+    config.curriculum_agent_reward_thresh = 0.7
+    config.curriculum_agent_success_rate = 0.7
+    config.curriculum_agent_fail_rate = 0.2
+
+We will now create the environments for training and evaluation. :mod:`conformer_rl` already has pre-built environments for curriculum learning. We will use the :class:`~conformer_rl.environments.environments.GibbsScorePruningCurriculumEnv` environment which is the same as the :class:`~conformer_rl.environments.environments.GibbsScorePruningEnv` we used previously except it is now compatible with curriculum learning. We will set the evaluation env to :class:`~conformer_rl.environments.environments.GibbsScorePruningEnv`::
+
+    # Task Settings
+    config.train_env = Task('GibbsScorePruningCurriculumEnv-v0', concurrency=True, num_envs=10, seed=np.random.randint(0,1e5), mol_configs=mol_configs)
+    config.eval_env = Task('GibbsScorePruningEnv-v0', seed=np.random.randint(0,7e4), mol_config=eval_mol_config)
+    config.eval_interval = 20000
+
+Next, we need to specify hyperaparameters specific to the curriculum. The specific meaning of each hyperparameter is discussed in :ref:`Curriculum-Supported Agents` and :ref:`Curriculum Conformer_env`::
+
+    # curriculum Hyperparameters
+    config.curriculum_agent_buffer_len = 20
+    config.curriculum_agent_reward_thresh = 0.7
+    config.curriculum_agent_success_rate = 0.7
+    config.curriculum_agent_fail_rate = 0.2
+
+Finally, we initiate our agent. Each of the pre-built agents in :mod:`conformer_rl` has a curriculum version as well. In this example we will use :class:`~conformer_rl.agents.curriculum_agents.PPORecurrentExternalCurriculumAgent`::
+
+    agent = PPORecurrentExternalCurriculumAgent(config)
+    agent.run_steps()
+
+We can now run the script to train the agent.
+
+For more information on how the curriculum environments and agents work, see the sections :ref:`Curriculum Conformer_env` and :ref:`Curriculum-Supported Agents`.
+
+
diff --git a/docs/source/tutorial/customizing_env_1.rst b/docs/source/tutorial/customizing_env_1.rst
@@ -74,7 +74,7 @@ we must initialize the neural network with the correct ``node_dim``. In :ref:`Ge
 Finally, when setting the ``train_env`` and ``eval_env``, we must specify the name of the environment to be the ``'Test-Env-v0'`` we registered::
   
   # Set the environment to the test env
-  config.train_env = Task('TestEnv-v0', concurrency=True, num_envs=5, seed=np.random.randint(0,1e5), mol_config=mol_config, max_steps=200)
-  config.eval_env = Task('TestEnv-v0', seed=np.random.randint(0,7e4), mol_config=mol_config, max_steps=200)
+  config.train_env = Task('TestEnv-v0', concurrency=True, num_envs=5, seed=np.random.randint(0,1e5), mol_config=mol_config)
+  config.eval_env = Task('TestEnv-v0', seed=np.random.randint(0,7e4), mol_config=mol_config)
 
   
diff --git a/docs/source/tutorial/getting_started.rst b/docs/source/tutorial/getting_started.rst
@@ -26,9 +26,9 @@ Suppose we want to generate conformers for a branched alkane molecule with 14 ca
 
 Next, we can use the function :func:`~conformer_rl.molecule_generation.generate_molecule_config.config_from_rdkit`::
 
-    mol_config = config_from_rdkit(mol, calc_normalizers=True, save_file='alkane')
+    mol_config = config_from_rdkit(mol, num_conformers=200, calc_normalizers=True, save_file='alkane')
 
-which will create a :class:`~conformer_rl.config.mol_config.MolConfig` for our branched alkane. By setting ``calc_normalizeres=True``, the function will calculate the normalizing constants which will be later used by the environment for calculating rewards. The ``ep_steps`` parameter specifies the number of conformers we want to generate in each environment episode and is used for calculating the normalizing constants. We also set ``save_file='alkane'``, so that the generated :class:`~conformer_rl.config.mol_config.MolConfig` object is dumped as a binary `Pickle <https://docs.python.org/3/library/pickle.html>`_ file named ``alkane.pkl``, so that it can be reused later.
+which will create a :class:`~conformer_rl.config.mol_config.MolConfig` for our branched alkane. The ``num_conformers`` parameter specifies the number of conformers we want to generate in each environment episode, in this case 200. By setting ``calc_normalizeres=True``, the function will calculate the normalizing constants which will be later used by the environment for calculating rewards. We also set ``save_file='alkane'``, so that the generated :class:`~conformer_rl.config.mol_config.MolConfig` object is dumped as a binary `Pickle <https://docs.python.org/3/library/pickle.html>`_ file named ``alkane.pkl``, so that it can be reused later.
 
 There are two main benefits for saving the generated :class:`~conformer_rl.config.mol_config.MolConfig` object. Firstly, the normalizing constants generated by setting ``calc_normalizers=True`` are not deterministic and relies on rdkit's conformer generation functionality (which uses random initialization). As discussed above, if we wish to compare the performance of two separate models on the same environment, the same set of normalizing constants should be used for both models, and a new set of normalizing constants should not be generated. Secondly, the generation of normalizing constants can be time consuming for large molecules, and it is therefore unnecessary to re-generate these constants for the same molecule for multiple experiments.
 
@@ -41,11 +41,11 @@ Custom Molecules
 """"""""""""""""
 If you have prepared your own molecule for conformer generation, and it is not in a rdkit mol format, :mod:`conformer_rl` also has functions to create :class:`~conformer_rl.config.mol_config.MolConfig` for other formats. For example, if your molecule can be expressed as a SMILES string, you can use the :func:`~conformer_rl.molecule_generation.generate_molecule_config.config_from_smiles` function, such as in the following example::
 
-    mol_config = config_from_smiles('CC(CCC)CC', calc_normalizers=True, save_file='alkane')
+    mol_config = config_from_smiles('CC(CCC)CC', num_conformers=200, calc_normalizers=True, save_file='alkane')
 
 The molecule can also be in the form of a MOL file, in which the function :func:`~conformer_rl.molecule_generation.generate_molecule_config.config_from_molFile` can be used::
 
-    mol_config = config_from_molFile('name_of_mol_file.mol', calc_normalizers=True, save_file='alkane')
+    mol_config = config_from_molFile('name_of_mol_file.mol', num_conformers=200, calc_normalizers=True, save_file='alkane')
 
 Configuring the Agent
 ^^^^^^^^^^^^^^^^^^^^^
@@ -60,20 +60,20 @@ Training Environment
 """"""""""""""""""""
 Next, we will set the training environment for the agent::
 
-    config.train_env = Task('GibbsScorePruningEnv-v0', concurrency=True, num_envs=5, seed=np.random.randint(0,1e5), mol_config=mol_config, max_steps=200)
+    config.train_env = Task('GibbsScorePruningEnv-v0', concurrency=True, num_envs=5, seed=np.random.randint(0,1e5), mol_config=mol_config)
 
 :func:`~conformer_rl.environments.environment_wrapper.Task` is a function that generates an environment wrapper compatible with the agent. Its main functionality is to generate multiple environments that the agent can interact with concurrently, which speeds up training if there are multiple CPU cores available.
 The first parameter, ``'GibbsScorePruningEnv-v0'``, specifies the name of the environment implementation to be used. In this case it represents the class :class:`~conformer_rl.environments.environments.GibbsScorePruningEnv`, which has empirically produced good results for several organic molecules. To learn more about how environments are registered and how to create custom environments, see :ref:`Customizing Environment - Part One` and :ref:`Customizing Environment - Part Two`.
 
-We set ``concurrency=True`` to utilize multithreading across each of the parallel environments during training. The ``num_envs`` parameter specifies the number of environments to be run in parallel. Next we pass in the :class:`~conformer_rl.config.mol_config.MolConfig` object we created earlier by setting ``mol_config=mol_config`` to specify molecule specific parameters when initiating the environments. Finally, we set the ``max_steps`` parameter, which specifies the number of conformers to generate (i.e., the number of environment steps) before the end of an episode in the environment. This parameter should be set to the same number as the ``ep_steps`` parameter when generating the normalizing constants for the :class:`~conformer_rl.config.mol_config.MolConfig` object using :func:`~conformer_rl.molecule_generation.generate_molecule_config.config_from_rdkit`, as described in :ref:`Configuring the Environment`.
+We set ``concurrency=True`` to utilize multithreading across each of the parallel environments during training. The ``num_envs`` parameter specifies the number of environments to be run in parallel. Next we pass in the :class:`~conformer_rl.config.mol_config.MolConfig` object we created earlier by setting ``mol_config=mol_config`` to specify molecule specific parameters when initiating the environments.
 
 Evaluation Environment
 """"""""""""""""""""""
 Optionally, we can specify an evaluation environment, which is an environment in which the agent will be periodically evaluated on throughout training. This is useful for a number of reasons. Firstly, if the evaluation environment is different from the training environment, we can use the evaluation environment to see how well the agent generalizes to other environments and assess whether the agent is overfitting to the training environment task. Additionally, the training framework implemented in :mod:`conformer_rl` will automatically save the molecules generated by the evaluation environment when evaluating the model, which can be used for analysis in downstream tasks.
 
 For simplicity, we will simply use the same molecule config for the evaluation environment in this example. We specify the evaluation environment in a similar way as the training environment, except that we do not require parallel environments so we use the default values for the ``concurrency`` and ``num_envs`` parameters::
 
-    config.eval_env = Task('GibbsScorePruningEnv-v0', seed=np.random.randint(0,7e4), mol_config=mol_config, max_steps=200)
+    config.eval_env = Task('GibbsScorePruningEnv-v0', seed=np.random.randint(0,7e4), mol_config=mol_config)
     config.eval_episodes=10000
 
 ``config.eval_episodes`` specifies how often (in number of episodes) the agent should be evaluated on the evaluation environment. If this is set to 0, the agent will not be evaluated on the evaluation environment.

diff --git a/docs/source/tutorial/model_tuning.rst b/docs/source/tutorial/model_tuning.rst
@@ -13,17 +13,18 @@ As in :ref:`Getting Started - Training a Conformer Generation Agent`, we set up
 
     # configure molecule
     mol = generate_lignin(3)
-    mol_config = config_from_rdkit(mol, calc_normalizers=True, save_file='lignin')
+    mol_config = config_from_rdkit(mol, num_conformers=200, calc_normalizers=True, save_file='lignin')
 
     # create agent config and set environment
     config = Config()
     config.tag = 'example2'
-    config.train_env = Task('GibbsScorePruningEnv-v0', concurrency=True, num_envs=20, mol_config=mol_config, max_steps=200)
+    config.train_env = Task('GibbsScorePruningEnv-v0', concurrency=True, num_envs=10, mol_config=mol_config)
 
 Configuring the Neural Network
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 :mod:`conformer_rl` contains implementations of several graph neural network models, which can be found in :ref:`models`. One neural network architecture that has performed well empirically in conformer generation is :class:`~conformer_rl.models.RTGN.RTGN`, which we will use in this example::
 
+    # Neural Network
     config.network = RTGN(6, 128, edge_dim=6, node_dim=5).to(device)
     
 Notice that the observation from :class:`~conformer_rl.environments.environments.GibbsScorePruningEnv`
@@ -34,6 +35,7 @@ Configuring Logging
 ^^^^^^^^^^^^^^^^^^^
 Next, we configure logging options::
 
+    # Logging Parameters
     config.save_interval = 20000
     config.data_dir = 'data'
     config.use_tensorboard = True
@@ -45,9 +47,10 @@ Configuring the Evaluation Environment
 
 Next, we can set up evaluation of the agent. In this example, we will have the agent be evaluated every 20000 steps, and we will set the eval environment to be conformer generation for a lignin polymer with four monomers (instead of three). Thus, the evaluation environment will allow us to see whether the agent is able to generalize from three monomer lignin to four monomer lignin. We will also have the agent evaluate for 2 episodes during each evaluation::
 
+    # Set up evaluation
     eval_mol = generate_lignin(4)
-    eval_mol_config = config_from_rdkit(mol, calc_normalizers=True, ep_steps=200, save_file='lignin_eval')
-    config.eval_env = Task('GibbsScorePruningEnv-v0', num_envs=1, mol_config=eval_mol_config, max_steps=200)
+    eval_mol_config = config_from_rdkit(mol, num_conformers=200, calc_normalizers=True, save_file='lignin_eval')
+    config.eval_env = Task('GibbsScorePruningEnv-v0', num_envs=1, mol_config=eval_mol_config)
     config.eval_interval = 20000
     config.eval_episodes = 2
 
@@ -59,7 +62,7 @@ Finally, we can set the other hyperparameters. For more information on what each
     config.rollout_length = 20
     config.recurrence = 5
     config.optimization_epochs = 4
-    config.max_steps = 200000
+    config.max_steps = 80000
     config.mini_batch_size = 50
 
     # Training Hyperparameters

diff --git a/examples/curriculum_example.py b/examples/curriculum_example.py
@@ -0,0 +1,50 @@
+import numpy as np
+import torch
+import pickle
+
+from conformer_rl import utils
+from conformer_rl.config import Config
+from conformer_rl.environments import Task
+from conformer_rl.models import RTGNRecurrent
+
+from conformer_rl.molecule_generation.generate_alkanes import generate_branched_alkane
+from conformer_rl.molecule_generation.generate_molecule_config import config_from_rdkit
+from conformer_rl.agents import PPORecurrentExternalCurriculumAgent
+
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+import logging
+logging.basicConfig(level=logging.DEBUG)
+
+
+if __name__ == '__main__':
+    utils.set_one_thread()
+
+    # Create mol_configs for the curriculum
+    mol_configs = [config_from_rdkit(generate_branched_alkane(i), num_conformers=200, calc_normalizers=True) for i in range(8, 16)]
+    eval_mol_config = config_from_rdkit(generate_branched_alkane(16), num_conformers=200, calc_normalizers=True)
+
+    config = Config()
+    config.tag = 'curriculum_test'
+    config.network = RTGNRecurrent(6, 128, edge_dim=6, node_dim=5).to(device)
+
+    # Batch Hyperparameters
+    config.max_steps = 100000
+
+    # training Hyperparameters
+    lr = 5e-6 * np.sqrt(10)
+    config.optimizer_fn = lambda params: torch.optim.Adam(params, lr=lr, eps=1e-5)
+
+    # Task Settings
+    config.train_env = Task('GibbsScorePruningCurriculumEnv-v0', concurrency=True, num_envs=10, seed=np.random.randint(0,1e5), mol_configs=mol_configs)
+    config.eval_env = Task('GibbsScorePruningEnv-v0', seed=np.random.randint(0,7e4), mol_config=eval_mol_config)
+    config.eval_interval = 20000
+
+    # curriculum Hyperparameters
+    config.curriculum_agent_buffer_len = 20
+    config.curriculum_agent_reward_thresh = 0.4
+    config.curriculum_agent_success_rate = 0.7
+    config.curriculum_agent_fail_rate = 0.2
+
+    agent = PPORecurrentExternalCurriculumAgent(config)
+    agent.run_steps()