forked from brsynth/RetroPathRL
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Rollout_policies.py
254 lines (227 loc) · 10.1 KB
/
Rollout_policies.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
"""
Defines the Rollout policies.
Usage is : move = RolloutPolicy.select_best_move(available_moves)
Remarks:
- various policies have been tested on toy examples on a Jupyter notebook during implementation
"""
from math import sqrt, log
import random
class Rollout_policy(object):
"""
Defines rollout policy.
From a list of moves, select the one that should be used for rollout.
This is the base object, subclasses necessitate a policy function.
"""
def __init__(self, policy_type, description = "Default Rollout Policy"):
self.policy_type = policy_type
self.description = description
def select_best_move(self, available_moves):
try:
move = self.policy(available_moves)
return(move)
except IndexError:
return(None)
def __str__(self):
return("Policy type: {} \nDescription: {}".format(self.policy_type, self.description))
class Rollout_policy_first(Rollout_policy):
"""
Defines rollout policy.
Always returns the first element: first compound, first rule
"""
def __init__(self):
description = "Always select the first compound_rule combination"
Rollout_policy.__init__(self, policy_type = "First found combination", description = description)
self.name = "Rollout_policy_first"
self.policy = self.policy()
def policy(self):
# CODE IT
def select_best_inside(available_moves):
move = available_moves[0]
return(move)
return(select_best_inside)
class Rollout_policy_chemical_best(Rollout_policy):
"""
Defines rollout policy.
Always returns the best chemical move
"""
def __init__(self):
description = "Always select the move with the highest chemical score"
Rollout_policy.__init__(self, policy_type = "Best Chemical", description = description)
self.policy = self.best_chemical_policy()
self.name = "Rollout_policy_chemical_best"
def best_chemical_policy(self):
# CODE IT
def select_best_inside(available_moves):
current_best = available_moves[0]
current_best_score = current_best.chemical_score
for element in available_moves:
chemical_score = element.chemical_score
if chemical_score > current_best_score:
current_best_score = chemical_score
current_best = element
return(current_best)
return(select_best_inside)
class Rollout_policy_biological_best(Rollout_policy):
"""
Defines rollout policy.
Always returns the best biological move
"""
def __init__(self):
description = "Always select the move with the highest biological score"
Rollout_policy.__init__(self, policy_type = "Best Biological", description = description)
self.policy = self.best_biological_policy()
self.name = "Rollout_policy_biological_best"
def best_biological_policy(self):
# CODE IT
def select_best_inside(available_moves):
current_best = available_moves[0]
current_best_score = current_best.biological_score
for element in available_moves:
biological_score = current_best_score = element.biological_score
if biological_score > current_best_score:
current_best_score = biological_score
current_best = element
return(current_best)
return(select_best_inside)
class Rollout_policy_biochemical_addition_best(Rollout_policy):
"""
Defines rollout policy.
Always returns the best biochemical (addition of scores) move
"""
def __init__(self):
description = "Select the highest Biochemical addition score"
Rollout_policy.__init__(self, policy_type = "Best Biochemical addition", description = description)
self.policy = self.best_biochemical_policy()
self.name = "Rollout_policy_biochemical_addition_best"
def best_biochemical_policy(self):
# CODE IT
def select_best_inside(available_moves):
current_best = available_moves[0]
current_best_score = current_best.biological_score + current_best.chemical_score
for element in available_moves:
biological_score = element.biological_score
chemical_score = element.chemical_score
if biological_score + chemical_score > current_best_score:
current_best_score = biological_score + chemical_score
current_best = element
return(current_best)
return(select_best_inside)
class Rollout_policy_biochemical_multiplication_best(Rollout_policy):
"""
Defines rollout policy.
Always returns the best biochemical (multiplication of scores) move
"""
def __init__(self):
description = "Select the highest Biochemical multiplication score"
Rollout_policy.__init__(self, policy_type = "Best Biochemical multiplication", description = description)
self.policy = self.best_biochemical_policy()
self.name = "Rollout_policy_biochemical_multiplication_best"
def best_biochemical_policy(self):
# CODE IT
def select_best_inside(available_moves):
current_best = available_moves[0]
current_best_score = current_best.biological_score * current_best.chemical_score
for element in available_moves:
biological_score = element.biological_score
chemical_score = element.chemical_score
if biological_score * chemical_score > current_best_score:
current_best_score = biological_score * chemical_score
current_best = element
return(current_best)
return(select_best_inside)
class Rollout_policy_random_uniform(Rollout_policy):
"""
Random sampling of the move amongst available moves
"""
def __init__(self):
description = "Random selection - no scoring involved"
Rollout_policy.__init__(self, policy_type = "Random sampling", description = description)
self.policy = self.policy()
self.name = "Rollout_policy_random_uniform"
def policy(self):
# CODE IT
def select_best_inside(available_moves):
index = random.randrange(0, len(available_moves))
move = available_moves[index]
return(move)
return(select_best_inside)
class Rollout_policy_random_uniform_on_chem_score(Rollout_policy):
"""
Random sampling of the move amongst available moves, weighted by chemical score
"""
def __init__(self):
description = "Random selection - uniform sampling from chemical weights"
Rollout_policy.__init__(self, policy_type = "Chemical uniform sampling", description = description)
self.policy = self.policy()
self.name = "Rollout_policy_random_uniform_on_chem_score"
def policy(self):
# CODE IT
def select_best_inside(available_moves):
pop, cum, cum_w = [], [], 0
for move in available_moves:
pop.append(move)
cum_w = cum_w + move.chemical_score
cum.append(cum_w)
move = random.choices(pop, cum_weights=cum, k=1)[0]
return(move)
return(select_best_inside)
class Rollout_policy_random_uniform_on_bio_score(Rollout_policy):
"""
Random sampling of the move amongst available moves, weighted by biological score
"""
def __init__(self):
description = "Random selection - uniform sampling from biological weights"
Rollout_policy.__init__(self, policy_type = "Biological uniform sampling", description = description)
self.policy = self.policy()
self.name = "Rollout_policy_random_uniform_on_bio_score"
def policy(self):
# CODE IT
def select_best_inside(available_moves):
pop, cum, cum_w = [], [], 0
for move in available_moves:
pop.append(move)
cum_w = cum_w + move.biological_score
cum.append(cum_w)
move = random.choices(pop, cum_weights=cum, k=1)[0]
return(move)
return(select_best_inside)
class Rollout_policy_random_uniform_on_biochemical_addition_score(Rollout_policy):
"""
Random sampling of the move amongst available moves, weighted by biochemical (addition) score
"""
def __init__(self):
description = "Random selection - uniform sampling from added biochemical weights"
Rollout_policy.__init__(self, policy_type = "Biochemical addition uniform sampling", description = description)
self.policy = self.policy()
self.name = "Rollout_policy_random_uniform_on_biochemical_addition_score"
def policy(self):
# CODE IT
def select_best_inside(available_moves):
pop, cum, cum_w = [], [], 0
for move in available_moves:
pop.append(move)
cum_w = cum_w + move.biological_score + move.chemical_score
cum.append(cum_w)
move = random.choices(pop, cum_weights=cum, k=1)[0]
return(move)
return(select_best_inside)
class Rollout_policy_random_uniform_on_biochemical_multiplication_score(Rollout_policy):
"""
Random sampling of the move amongst available moves, weighted by biochemical (multiplication) score
"""
def __init__(self):
description = "Random selection - uniform sampling from multiplied biochemical weights"
Rollout_policy.__init__(self, policy_type = "Biochemical uniform sampling", description = description)
self.policy = self.policy()
self.name = "Rollout_policy_random_uniform_on_biochemical_multiplication_score"
def policy(self):
# CODE IT
def select_best_inside(available_moves):
pop, cum, cum_w = [], [], 0
for move in available_moves:
pop.append(move)
cum_w = cum_w + move.biological_score * move.chemical_score
cum.append(cum_w)
move = random.choices(pop, cum_weights=cum, k=1)[0]
return(move)
return(select_best_inside)