From 1bfaaecc0080d67978e7ebe5abc4db3454a120f6 Mon Sep 17 00:00:00 2001 From: Matteo Bettini Date: Wed, 24 Apr 2024 11:14:55 +0100 Subject: [PATCH 1/6] amend --- fine_tuned/meltingpot/conf/config.yaml | 71 +++++++++++++++++++++++++ fine_tuned/meltingpot/meltingpot_run.py | 31 +++++++++++ 2 files changed, 102 insertions(+) create mode 100644 fine_tuned/meltingpot/conf/config.yaml create mode 100644 fine_tuned/meltingpot/meltingpot_run.py diff --git a/fine_tuned/meltingpot/conf/config.yaml b/fine_tuned/meltingpot/conf/config.yaml new file mode 100644 index 00000000..6e580247 --- /dev/null +++ b/fine_tuned/meltingpot/conf/config.yaml @@ -0,0 +1,71 @@ +defaults: + - experiment: base_experiment + - algorithm: ippo + - task: meltingpot/commons_harvest__open + - model: layers/cnn + - model@critic_model: layers/cnn + - _self_ + +hydra: + searchpath: + # Tells hydra to add the default benchmarl configuration to its path + - pkg://benchmarl/conf + +seed: 0 + +task: + max_steps: 1000 + +model: + mlp_num_cells: [ 256, 256 ] + + cnn_num_cells: [ 16, 32, 256 ] + cnn_kernel_sizes: [ 8, 4, 11 ] + cnn_strides: [4, 2, 1] + cnn_paddings: [2, 1, 5] + cnn_activation_class: torch.nn.ReLU + +critic_model: + mlp_num_cells: [ 256, 256 ] + + cnn_num_cells: [ 16, 32, 256 ] + cnn_kernel_sizes: [ 8, 4, 11 ] + cnn_strides: [ 4, 2, 1 ] + cnn_paddings: [ 2, 1, 5 ] + cnn_activation_class: torch.nn.ReLU + +algorithm: + entropy_coef: 0.001 + use_tanh_normal: True + +experiment: + sampling_device: "cpu" + train_device: "cuda" + + share_policy_params: True + gamma: 0.99 + + adam_eps: 0.000001 + lr: 0.00025 + clip_grad_norm: True + clip_grad_val: 5 + + max_n_iters: null + max_n_frames: 10_000_000 + + on_policy_collected_frames_per_batch: 1000 + on_policy_n_envs_per_worker: 1 + on_policy_n_minibatch_iters: 45 + on_policy_minibatch_size: 100 + + evaluation: True + render: True + evaluation_interval: 1000 + evaluation_episodes: 1 + + loggers: [wandb] + create_json: False + + save_folder: null + restore_file: null + checkpoint_interval: 0 diff --git a/fine_tuned/meltingpot/meltingpot_run.py b/fine_tuned/meltingpot/meltingpot_run.py new file mode 100644 index 00000000..703d38e1 --- /dev/null +++ b/fine_tuned/meltingpot/meltingpot_run.py @@ -0,0 +1,31 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# + +import hydra + +from benchmarl.experiment import Experiment + +from benchmarl.hydra_config import load_experiment_from_hydra +from hydra.core.hydra_config import HydraConfig +from omegaconf import DictConfig, OmegaConf + + +@hydra.main(version_base=None, config_path="conf", config_name="config") +def hydra_experiment(cfg: DictConfig) -> None: + hydra_choices = HydraConfig.get().runtime.choices + task_name = hydra_choices.task + algorithm_name = hydra_choices.algorithm + + print(f"\nAlgorithm: {algorithm_name}, Task: {task_name}") + print("\nLoaded config:\n") + print(OmegaConf.to_yaml(cfg)) + + experiment: Experiment = load_experiment_from_hydra(cfg, task_name=task_name) + experiment.run() + + +if __name__ == "__main__": + hydra_experiment() From 9b9dea8fc383129ca9c6b3454906b3e778599b7f Mon Sep 17 00:00:00 2001 From: Matteo Bettini Date: Wed, 24 Apr 2024 11:21:10 +0100 Subject: [PATCH 2/6] amend --- benchmarl/environments/meltingpot/common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarl/environments/meltingpot/common.py b/benchmarl/environments/meltingpot/common.py index f209d8b1..4e12e79f 100644 --- a/benchmarl/environments/meltingpot/common.py +++ b/benchmarl/environments/meltingpot/common.py @@ -81,6 +81,7 @@ def get_env_fun( return lambda: MeltingpotEnv( substrate=self.name.lower(), categorical_actions=True, + device=device, **self.config, ) From 6be6843011ab08bd78b634229baf7b2fc9be2287 Mon Sep 17 00:00:00 2001 From: Matteo Bettini Date: Wed, 24 Apr 2024 14:04:30 +0100 Subject: [PATCH 3/6] amend --- fine_tuned/meltingpot/conf/config.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fine_tuned/meltingpot/conf/config.yaml b/fine_tuned/meltingpot/conf/config.yaml index 6e580247..696cfd84 100644 --- a/fine_tuned/meltingpot/conf/config.yaml +++ b/fine_tuned/meltingpot/conf/config.yaml @@ -14,7 +14,7 @@ hydra: seed: 0 task: - max_steps: 1000 + max_steps: 200 model: mlp_num_cells: [ 256, 256 ] @@ -61,7 +61,8 @@ experiment: evaluation: True render: True evaluation_interval: 1000 - evaluation_episodes: 1 + evaluation_episodes: 5 + evaluation_deterministic_actions: False loggers: [wandb] create_json: False From d04b755f817dcf96793092749badc0c43099456b Mon Sep 17 00:00:00 2001 From: Matteo Bettini Date: Wed, 24 Apr 2024 16:33:49 +0100 Subject: [PATCH 4/6] amend --- fine_tuned/meltingpot/conf/config.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fine_tuned/meltingpot/conf/config.yaml b/fine_tuned/meltingpot/conf/config.yaml index 696cfd84..0dcfbfcc 100644 --- a/fine_tuned/meltingpot/conf/config.yaml +++ b/fine_tuned/meltingpot/conf/config.yaml @@ -14,7 +14,8 @@ hydra: seed: 0 task: - max_steps: 200 + max_steps: 1400 + disable_shooting: True model: mlp_num_cells: [ 256, 256 ] @@ -46,7 +47,7 @@ experiment: gamma: 0.99 adam_eps: 0.000001 - lr: 0.00025 + lr: 0.00005 clip_grad_norm: True clip_grad_val: 5 @@ -61,7 +62,7 @@ experiment: evaluation: True render: True evaluation_interval: 1000 - evaluation_episodes: 5 + evaluation_episodes: 1 evaluation_deterministic_actions: False loggers: [wandb] From 8b0279ad9aabd9e196743036bb9838601a58702e Mon Sep 17 00:00:00 2001 From: Matteo Bettini Date: Sun, 28 Apr 2024 10:35:38 +0100 Subject: [PATCH 5/6] amend --- .../meltingpot/conf/config_more_perf.yaml | 72 +++++++++++++++++++ fine_tuned/meltingpot/meltingpot_run.py | 2 +- 2 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 fine_tuned/meltingpot/conf/config_more_perf.yaml diff --git a/fine_tuned/meltingpot/conf/config_more_perf.yaml b/fine_tuned/meltingpot/conf/config_more_perf.yaml new file mode 100644 index 00000000..d4665c3f --- /dev/null +++ b/fine_tuned/meltingpot/conf/config_more_perf.yaml @@ -0,0 +1,72 @@ +defaults: + - experiment: base_experiment + - algorithm: ippo + - task: meltingpot/commons_harvest__open + - model: layers/cnn + - model@critic_model: layers/cnn + - _self_ + +hydra: + searchpath: + # Tells hydra to add the default benchmarl configuration to its path + - pkg://benchmarl/conf + +seed: 0 + +task: + disable_shooting: True + +model: + mlp_num_cells: [ 256, 256 ] + + cnn_num_cells: [ 16, 32, 256 ] + cnn_kernel_sizes: [ 8, 4, 11 ] + cnn_strides: [4, 2, 1] + cnn_paddings: [2, 1, 5] + cnn_activation_class: torch.nn.ReLU + +critic_model: + mlp_num_cells: [ 256, 256 ] + + cnn_num_cells: [ 16, 32, 256 ] + cnn_kernel_sizes: [ 8, 4, 11 ] + cnn_strides: [ 4, 2, 1 ] + cnn_paddings: [ 2, 1, 5 ] + cnn_activation_class: torch.nn.ReLU + +algorithm: + entropy_coef: 0.001 + use_tanh_normal: True + +experiment: + sampling_device: "cpu" + train_device: "cuda" + + share_policy_params: True + gamma: 0.99 + + adam_eps: 0.000001 + lr: 0.00025 + clip_grad_norm: True + clip_grad_val: 5 + + max_n_iters: null + max_n_frames: 10_000_000 + + on_policy_collected_frames_per_batch: 2000 + on_policy_n_envs_per_worker: 1 + on_policy_n_minibatch_iters: 45 + on_policy_minibatch_size: 200 + + evaluation: True + render: True + evaluation_interval: 2000 + evaluation_episodes: 1 + evaluation_deterministic_actions: False + + loggers: [wandb] + create_json: False + + save_folder: null + restore_file: null + checkpoint_interval: 0 diff --git a/fine_tuned/meltingpot/meltingpot_run.py b/fine_tuned/meltingpot/meltingpot_run.py index 703d38e1..b1f581f9 100644 --- a/fine_tuned/meltingpot/meltingpot_run.py +++ b/fine_tuned/meltingpot/meltingpot_run.py @@ -13,7 +13,7 @@ from omegaconf import DictConfig, OmegaConf -@hydra.main(version_base=None, config_path="conf", config_name="config") +@hydra.main(version_base=None, config_path="conf", config_name="config_more_perf") def hydra_experiment(cfg: DictConfig) -> None: hydra_choices = HydraConfig.get().runtime.choices task_name = hydra_choices.task From 668c526c8c0c0d515113950ac706d8f103535aed Mon Sep 17 00:00:00 2001 From: Matteo Bettini Date: Fri, 3 May 2024 15:58:39 +0100 Subject: [PATCH 6/6] amend --- fine_tuned/meltingpot/conf/config.yaml | 5 +- .../meltingpot/conf/config_more_perf.yaml | 72 ------------------- fine_tuned/meltingpot/meltingpot_run.py | 2 +- 3 files changed, 3 insertions(+), 76 deletions(-) delete mode 100644 fine_tuned/meltingpot/conf/config_more_perf.yaml diff --git a/fine_tuned/meltingpot/conf/config.yaml b/fine_tuned/meltingpot/conf/config.yaml index 0dcfbfcc..4f57d154 100644 --- a/fine_tuned/meltingpot/conf/config.yaml +++ b/fine_tuned/meltingpot/conf/config.yaml @@ -14,8 +14,7 @@ hydra: seed: 0 task: - max_steps: 1400 - disable_shooting: True + max_steps: 200 model: mlp_num_cells: [ 256, 256 ] @@ -47,7 +46,7 @@ experiment: gamma: 0.99 adam_eps: 0.000001 - lr: 0.00005 + lr: 0.00025 clip_grad_norm: True clip_grad_val: 5 diff --git a/fine_tuned/meltingpot/conf/config_more_perf.yaml b/fine_tuned/meltingpot/conf/config_more_perf.yaml deleted file mode 100644 index d4665c3f..00000000 --- a/fine_tuned/meltingpot/conf/config_more_perf.yaml +++ /dev/null @@ -1,72 +0,0 @@ -defaults: - - experiment: base_experiment - - algorithm: ippo - - task: meltingpot/commons_harvest__open - - model: layers/cnn - - model@critic_model: layers/cnn - - _self_ - -hydra: - searchpath: - # Tells hydra to add the default benchmarl configuration to its path - - pkg://benchmarl/conf - -seed: 0 - -task: - disable_shooting: True - -model: - mlp_num_cells: [ 256, 256 ] - - cnn_num_cells: [ 16, 32, 256 ] - cnn_kernel_sizes: [ 8, 4, 11 ] - cnn_strides: [4, 2, 1] - cnn_paddings: [2, 1, 5] - cnn_activation_class: torch.nn.ReLU - -critic_model: - mlp_num_cells: [ 256, 256 ] - - cnn_num_cells: [ 16, 32, 256 ] - cnn_kernel_sizes: [ 8, 4, 11 ] - cnn_strides: [ 4, 2, 1 ] - cnn_paddings: [ 2, 1, 5 ] - cnn_activation_class: torch.nn.ReLU - -algorithm: - entropy_coef: 0.001 - use_tanh_normal: True - -experiment: - sampling_device: "cpu" - train_device: "cuda" - - share_policy_params: True - gamma: 0.99 - - adam_eps: 0.000001 - lr: 0.00025 - clip_grad_norm: True - clip_grad_val: 5 - - max_n_iters: null - max_n_frames: 10_000_000 - - on_policy_collected_frames_per_batch: 2000 - on_policy_n_envs_per_worker: 1 - on_policy_n_minibatch_iters: 45 - on_policy_minibatch_size: 200 - - evaluation: True - render: True - evaluation_interval: 2000 - evaluation_episodes: 1 - evaluation_deterministic_actions: False - - loggers: [wandb] - create_json: False - - save_folder: null - restore_file: null - checkpoint_interval: 0 diff --git a/fine_tuned/meltingpot/meltingpot_run.py b/fine_tuned/meltingpot/meltingpot_run.py index b1f581f9..703d38e1 100644 --- a/fine_tuned/meltingpot/meltingpot_run.py +++ b/fine_tuned/meltingpot/meltingpot_run.py @@ -13,7 +13,7 @@ from omegaconf import DictConfig, OmegaConf -@hydra.main(version_base=None, config_path="conf", config_name="config_more_perf") +@hydra.main(version_base=None, config_path="conf", config_name="config") def hydra_experiment(cfg: DictConfig) -> None: hydra_choices = HydraConfig.get().runtime.choices task_name = hydra_choices.task