-
Notifications
You must be signed in to change notification settings - Fork 0
/
visualize_policy.py
57 lines (46 loc) · 1.94 KB
/
visualize_policy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import SubprocVecEnv, VecVideoRecorder
from utils.train_utils import *
import sys
if __name__ == "__main__":
env_id = "Curriculum/FetchSlide"
num_cpu = 24
if len(sys.argv) > 1:
task = sys.argv[1]
sample_num = sys.argv[2]
log_dir = "./logs/AntMaze_UMaze_SAC_empty/" + task + "/sample_" + sample_num
else:
log_dir = "./logs/Fetch_Slide/curriculum_5/[Original task]/sample_2"
task = None
test_env = SubprocVecEnv([make_env(env_id, i, render_mode="rgb_array") for i in range(num_cpu)])
model = SAC.load(log_dir + "/final_model.zip", env=test_env)
# model = PPO.load(log_dir + "/final_model.zip", env=test_env)
# Visualize the policy
obs = test_env.reset()
test_env = VecVideoRecorder(test_env, video_folder=log_dir,
record_video_trigger=lambda x: x == 0, video_length=500,
name_prefix="Curriculum")
test_env.reset()
for i in range(500):
action, _states = model.predict(obs, deterministic=True)
obs, rewards, dones, info = test_env.step(action)
test_env.close()
# test_env = SubprocVecEnv([make_env(env_id, i, render_mode="human") for i in range(num_cpu)])
# obs = test_env.reset()
# test_env.render("human")
# # time.sleep(5)
# # input("Press Enter to continue...")
# # images = [img]
# for i in range(50):
# # action = np.random.uniform(-1, 1, size=(num_cpu, 8))
# action, _states = model.predict(obs, deterministic=True)
# obs, rewards, dones, info = test_env.step(action)
# test_env.render("human")
# time.sleep(0.05)
# # images.append(img)
# # print(img)
# # print(rewards)
# # print(dones)
# # print(info)
# print(images)
# imageio.mimsave(log_dir + "/rollout.gif", [np.array(img) for i, img in enumerate(images)], fps=20)