forked from AI4Finance-Foundation/ElegantRL
-
Notifications
You must be signed in to change notification settings - Fork 0
/
demo_IsaacGym.py
81 lines (68 loc) · 2.08 KB
/
demo_IsaacGym.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import isaacgym
import torch
import sys
# import wandb
from elegantrl.train.run import train_and_evaluate
from elegantrl.train.config import Arguments, build_env
from elegantrl.agents.AgentPPO import AgentPPO
from elegantrl.envs.IsaacGym import IsaacVecEnv
def demo(task):
env_name = task
agent_class = AgentPPO
env_func = IsaacVecEnv
if env_name == 'Ant':
env_args = {
'env_num': 2048,
'env_name': env_name,
'max_step': 1000,
'state_dim': 60,
'action_dim': 8,
'if_discrete': False,
'target_return': 6000.,
'sim_device_id': 0,
'rl_device_id': 0,
}
env = build_env(env_func=env_func, env_args=env_args)
args = Arguments(agent_class, env=env)
args.if_Isaac = True
args.if_use_old_traj = True
args.if_use_gae = True
args.reward_scale = 2 ** -4
args.horizon_len = 32
args.batch_size = 16384 # minibatch size
args.repeat_times = 5
args.gamma = 0.99
args.lambda_gae_adv = 0.95
args.learning_rate = 0.0005
elif env_name == 'Humanoid':
env_args = {
'env_num': 1024,
'env_name': env_name,
'max_step': 1000,
'state_dim': 108,
'action_dim': 21,
'if_discrete': False,
'target_return': 15000.,
'sim_device_id': gpu_id,
'rl_device_id': gpu_id,
}
env = build_env(env_func=env_func, env_args=env_args)
args = Arguments(agent_class, env=env)
args.if_Isaac = True
args.if_use_old_traj = True
args.if_use_gae = True
args.reward_scale = 0.01
args.horizon_len = 32
args.batch_size = 8192
args.repeat_times = 5
args.gamma = 0.99
args.lambda_gae_adv = 0.95
args.learning_rate = 0.0005
args.eval_gap = 1e6
args.target_step = 3e8
args.learner_gpus = 0
args.random_seed = 0
train_and_evaluate(args)
if __name__ == '__main__':
task = 'Ant'
demo(task)