Skip to content

Commit

Permalink
Merge pull request #16 from hsvgbkhgbv/fix_policy_optim
Browse files Browse the repository at this point in the history
Fix policy optim
  • Loading branch information
mikezhang95 authored Nov 19, 2019
2 parents 2c1db06 + b87c411 commit ac7b796
Show file tree
Hide file tree
Showing 28 changed files with 756 additions and 541 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ models/__pycache__/
utilities/__pycache__/
environments/__pycache__/
tensorboard/
arguments.py
71 changes: 71 additions & 0 deletions args/simple_spread_coma_fc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from collections import namedtuple
from multiagent.environment import MultiAgentEnv
import multiagent.scenarios as scenario
from utilities.gym_wrapper import *
import numpy as np
from aux import *


'''define the model name'''
model_name = 'coma_fc'

'''define the scenario name'''
scenario_name = 'simple_spread'

'''define the special property'''
# independentArgs = namedtuple( 'independentArgs', [] )
aux_args = AuxArgs[model_name]()
alias = '_new_1'

'''load scenario from script'''
scenario = scenario.load(scenario_name+".py").Scenario()

'''create world'''
world = scenario.make_world()

'''create multiagent environment'''
env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, info_callback=None, shared_viewer=True)
env = GymWrapper(env)

MergeArgs = namedtuple('MergeArgs', Args._fields+AuxArgs[model_name]._fields)

# under offline trainer if set batch_size=replay_buffer_size=update_freq -> epoch update
args = Args(model_name=model_name,
agent_num=env.get_num_of_agents(),
hid_size=32,
obs_size=np.max(env.get_shape_of_obs()),
continuous=False,
action_dim=np.max(env.get_output_shape_of_act()),
init_std=0.1,
policy_lrate=1e-2,
value_lrate=1e-4,
max_steps=200,
batch_size=100,
gamma=0.9,
normalize_advantages=False,
entr=1e-2,
entr_inc=0.0,
action_num=np.max(env.get_input_shape_of_act()),
q_func=True,
train_episodes_num=int(5e3),
replay=True,
replay_buffer_size=1e4,
replay_warmup=0,
cuda=True,
grad_clip=True,
save_model_freq=10,
target=True,
target_lr=1e-1,
behaviour_update_freq=100,
critic_update_times=10,
target_update_freq=200,
gumbel_softmax=False,
epsilon_softmax=False,
online=True,
reward_record_type='episode_mean_step',
shared_parameters=True
)

args = MergeArgs(*(args+aux_args))

log_name = scenario_name + '_' + model_name + alias
14 changes: 7 additions & 7 deletions args/simple_spread_independent_ac.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
'''define the special property'''
# independentArgs = namedtuple( 'independentArgs', [] )
aux_args = AuxArgs[model_name]()
alias = ''
alias = '_new_1'

'''load scenario from script'''
scenario = scenario.load(scenario_name+".py").Scenario()
Expand All @@ -38,29 +38,29 @@
continuous=False,
action_dim=np.max(env.get_output_shape_of_act()),
init_std=0.1,
policy_lrate=1e-3,
value_lrate=1e-2,
policy_lrate=1e-6,
value_lrate=1e-5,
max_steps=200,
batch_size=32,
batch_size=100,
gamma=0.9,
normalize_advantages=False,
entr=1e-2,
entr_inc=0.0,
action_num=np.max(env.get_input_shape_of_act()),
q_func=True,
train_episodes_num=int(2e3),
train_episodes_num=int(5e3),
replay=True,
replay_buffer_size=1e4,
replay_warmup=0,
cuda=True,
grad_clip=True,
save_model_freq=100,
save_model_freq=10,
target=True,
target_lr=1e-1,
behaviour_update_freq=100,
critic_update_times=10,
target_update_freq=200,
gumbel_softmax=True,
gumbel_softmax=False,
epsilon_softmax=False,
online=True,
reward_record_type='episode_mean_step',
Expand Down
10 changes: 5 additions & 5 deletions args/simple_spread_independent_ddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
'''define the special property'''
# independentArgs = namedtuple( 'independentArgs', [] )
aux_args = AuxArgs[model_name]()
alias = ''
alias = '_new_6'

'''load scenario from script'''
scenario = scenario.load(scenario_name+".py").Scenario()
Expand Down Expand Up @@ -47,14 +47,14 @@
entr=1e-2,
entr_inc=0.0,
action_num=np.max(env.get_input_shape_of_act()),
q_func=True,
train_episodes_num=int(1e4),
q_func=False,
train_episodes_num=int(5e3),
replay=True,
replay_buffer_size=1e6,
replay_buffer_size=1e4,
replay_warmup=0,
cuda=True,
grad_clip=True,
save_model_freq=100,
save_model_freq=10,
target=True,
target_lr=1e-1,
behaviour_update_freq=100,
Expand Down
8 changes: 4 additions & 4 deletions args/simple_spread_maddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
'''define the special property'''
# maddpgArgs = namedtuple( 'maddpgArgs', [] )
aux_args = AuxArgs[model_name]()
alias = ''
alias = '_new_3'

'''load scenario from script'''
scenario = scenario.load(scenario_name+".py").Scenario()
Expand All @@ -38,8 +38,8 @@
continuous=False,
action_dim=np.max(env.get_output_shape_of_act()),
init_std=0.1,
policy_lrate=1e-3,
value_lrate=1e-2,
policy_lrate=1e-4,
value_lrate=1e-3,
max_steps=200,
batch_size=32,
gamma=0.9,
Expand All @@ -48,7 +48,7 @@
entr_inc=0.0,
action_num=np.max(env.get_input_shape_of_act()),
q_func=True,
train_episodes_num=int(2e3),
train_episodes_num=int(5e3),
replay=True,
replay_buffer_size=1e4,
replay_warmup=0,
Expand Down
10 changes: 5 additions & 5 deletions args/simple_spread_sqddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@

'''define the special property'''
# sqddpgArgs = namedtuple( 'sqddpgArgs', ['sample_size'] )
aux_args = AuxArgs[model_name](1)
alias = ''
aux_args = AuxArgs[model_name](5)
alias = '_new_sample_12'

'''load scenario from script'''
scenario = scenario.load(scenario_name+".py").Scenario()
Expand All @@ -38,8 +38,8 @@
continuous=False,
action_dim=np.max(env.get_output_shape_of_act()),
init_std=0.1,
policy_lrate=1e-3,
value_lrate=1e-2,
policy_lrate=1e-4,
value_lrate=1e-3,
max_steps=200,
batch_size=32,
gamma=0.9,
Expand All @@ -48,7 +48,7 @@
entr_inc=0.0,
action_num=np.max(env.get_input_shape_of_act()),
q_func=True,
train_episodes_num=int(2e3),
train_episodes_num=int(5e3),
replay=True,
replay_buffer_size=1e4,
replay_warmup=0,
Expand Down
71 changes: 71 additions & 0 deletions args/simple_tag_coma_fc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from collections import namedtuple
from multiagent.environment import MultiAgentEnv
import multiagent.scenarios as scenario
from utilities.gym_wrapper import *
import numpy as np
from aux import *


'''define the model name'''
model_name = 'coma_fc'

'''define the scenario name'''
scenario_name = 'simple_tag'

'''define the special property'''
# independentArgs = namedtuple( 'independentArgs', [] )
aux_args = AuxArgs[model_name]()
alias = ''

'''load scenario from script'''
scenario = scenario.load(scenario_name+".py").Scenario()

'''create world'''
world = scenario.make_world()

'''create multiagent environment'''
env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, info_callback=None, shared_viewer=True,done_callback=scenario.episode_over)
env = GymWrapper(env)

MergeArgs = namedtuple('MergeArgs', Args._fields+AuxArgs[model_name]._fields)

# under offline trainer if set batch_size=replay_buffer_size=update_freq -> epoch update
args = Args(model_name=model_name,
agent_num=env.get_num_of_agents(),
hid_size=128,
obs_size=np.max(env.get_shape_of_obs()),
continuous=False,
action_dim=np.max(env.get_output_shape_of_act()),
init_std=0.1,
policy_lrate=1e-3,
value_lrate=1e-4,
max_steps=200,
batch_size=100,
gamma=0.99,
normalize_advantages=False,
entr=1e-3,
entr_inc=0.0,
action_num=np.max(env.get_input_shape_of_act()),
q_func=True,
train_episodes_num=int(5e3),
replay=True,
replay_buffer_size=1e4,
replay_warmup=0,
cuda=True,
grad_clip=True,
save_model_freq=10,
target=True,
target_lr=1e-1,
behaviour_update_freq=100,
critic_update_times=10,
target_update_freq=200,
gumbel_softmax=False,
epsilon_softmax=False,
online=True,
reward_record_type='episode_mean_step',
shared_parameters=False
)

args = MergeArgs(*(args+aux_args))

log_name = scenario_name + '_' + model_name + alias
11 changes: 5 additions & 6 deletions args/simple_tag_independent_ac.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from aux import *



'''define the model name'''
model_name = 'independent_ac'

Expand Down Expand Up @@ -38,17 +37,17 @@
continuous=False,
action_dim=np.max(env.get_output_shape_of_act()),
init_std=0.1,
policy_lrate=1e-4,
value_lrate=1e-3,
policy_lrate=1e-3,
value_lrate=1e-4,
max_steps=200,
batch_size=128,
batch_size=100,
gamma=0.99,
normalize_advantages=False,
entr=1e-3,
entr_inc=0.0,
action_num=np.max(env.get_input_shape_of_act()),
q_func=True,
train_episodes_num=int(4e3),
train_episodes_num=int(5e3),
replay=True,
replay_buffer_size=1e4,
replay_warmup=0,
Expand All @@ -60,7 +59,7 @@
behaviour_update_freq=100,
critic_update_times=10,
target_update_freq=200,
gumbel_softmax=True,
gumbel_softmax=False,
epsilon_softmax=False,
online=True,
reward_record_type='episode_mean_step',
Expand Down
6 changes: 3 additions & 3 deletions args/simple_tag_independent_ddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,16 @@
action_dim=np.max(env.get_output_shape_of_act()),
init_std=0.1,
policy_lrate=1e-4,
value_lrate=1e-3,
value_lrate=5e-4,
max_steps=200,
batch_size=128,
gamma=0.99,
normalize_advantages=False,
entr=1e-3,
entr_inc=0.0,
action_num=np.max(env.get_input_shape_of_act()),
q_func=True,
train_episodes_num=int(4e3),
q_func=False,
train_episodes_num=int(5e3),
replay=True,
replay_buffer_size=1e4,
replay_warmup=0,
Expand Down
4 changes: 2 additions & 2 deletions args/simple_tag_maddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
action_dim=np.max(env.get_output_shape_of_act()),
init_std=0.1,
policy_lrate=1e-4,
value_lrate=1e-3,
value_lrate=5e-4,
max_steps=200,
batch_size=128,
gamma=0.99,
Expand All @@ -48,7 +48,7 @@
entr_inc=0.0,
action_num=np.max(env.get_input_shape_of_act()),
q_func=True,
train_episodes_num=int(4e3),
train_episodes_num=int(5e3),
replay=True,
replay_buffer_size=1e4,
replay_warmup=0,
Expand Down
Loading

0 comments on commit ac7b796

Please sign in to comment.