update modularl version

zakaria-narjis · Aug 6, 2024 · 3aa5b9a · 3aa5b9a
1 parent ffc2b23
commit 3aa5b9a
Show file tree

Hide file tree

Showing 3 changed files with 3 additions and 7 deletions.
diff --git a/modularl/agents/agent.py b/modularl/agents/agent.py
@@ -80,11 +80,11 @@ def act_train(self, batch_obs: torch.Tensor) -> torch.Tensor:
         """  # noqa: E501
 
     @abc.abstractmethod
-    def act_eval(self, obs: torch.Tensor) -> torch.Tensor:
+    def act_eval(self, batch_obs: torch.Tensor) -> torch.Tensor:
         """
         Select an action for evaluation.
 
-        :param obs: (torch.Tensor) Tensor containing the observation.
+        :param batch_obs: (torch.Tensor) Tensor containing the observation.
         :return: (torch.Tensor) Selected action for evaluation.
         """  # noqa: E501
 

diff --git a/modularl/agents/sac.py b/modularl/agents/sac.py
@@ -162,8 +162,6 @@ def observe(
     def act_train(self, batch_obs: torch.Tensor) -> torch.Tensor:
         """
         Generate actions for training based on the current policy.
-
-        This method handles the exploration-exploitation trade-off during training.
         It uses a burning action function for initial exploration if specified,
         then switches to the learned policy.
 
@@ -174,8 +172,6 @@ def act_train(self, batch_obs: torch.Tensor) -> torch.Tensor:
             - If the global step is less than `learning_starts` and a burning action
               function is provided, it uses that function for exploration.
             - Otherwise, it uses the current policy (actor) to generate actions.
-            - The actions are detached from the computation graph to prevent
-              gradients from flowing back through the actor during certain updates.
         """  # noqa: E501
         if (
             self.global_step < self.learning_starts

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name="modularl",
-    version="0.1.1",
+    version="0.1.2",
     author="Zakaria Narjis",
     author_email="zakaria.narjis.97@gmail.com",
     description="A modular reinforcement learning library",