diff --git a/modularl/agents/agent.py b/modularl/agents/agent.py index 71bcbd1..519bdca 100644 --- a/modularl/agents/agent.py +++ b/modularl/agents/agent.py @@ -80,11 +80,11 @@ def act_train(self, batch_obs: torch.Tensor) -> torch.Tensor: """ # noqa: E501 @abc.abstractmethod - def act_eval(self, obs: torch.Tensor) -> torch.Tensor: + def act_eval(self, batch_obs: torch.Tensor) -> torch.Tensor: """ Select an action for evaluation. - :param obs: (torch.Tensor) Tensor containing the observation. + :param batch_obs: (torch.Tensor) Tensor containing the observation. :return: (torch.Tensor) Selected action for evaluation. """ # noqa: E501 diff --git a/modularl/agents/sac.py b/modularl/agents/sac.py index 203c477..0223bd1 100644 --- a/modularl/agents/sac.py +++ b/modularl/agents/sac.py @@ -162,8 +162,6 @@ def observe( def act_train(self, batch_obs: torch.Tensor) -> torch.Tensor: """ Generate actions for training based on the current policy. - - This method handles the exploration-exploitation trade-off during training. It uses a burning action function for initial exploration if specified, then switches to the learned policy. @@ -174,8 +172,6 @@ def act_train(self, batch_obs: torch.Tensor) -> torch.Tensor: - If the global step is less than `learning_starts` and a burning action function is provided, it uses that function for exploration. - Otherwise, it uses the current policy (actor) to generate actions. - - The actions are detached from the computation graph to prevent - gradients from flowing back through the actor during certain updates. """ # noqa: E501 if ( self.global_step < self.learning_starts diff --git a/setup.py b/setup.py index 44315b7..d48f246 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name="modularl", - version="0.1.1", + version="0.1.2", author="Zakaria Narjis", author_email="zakaria.narjis.97@gmail.com", description="A modular reinforcement learning library",