-
Notifications
You must be signed in to change notification settings - Fork 0
/
map_commented.py
279 lines (236 loc) · 15.4 KB
/
map_commented.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
# Self Driving Car
# RUN TENSORBOARD
# tensorboard --logdir runs --port 6006
# UPDATE TENSORBOARD FASTER
# setInterval(function() {document.getElementById('reload-button').click()}, 3000);
# Importing the libraries
import numpy as np
from random import random, randint
import matplotlib.pyplot as plt
import time
import os
# Importing the Kivy packages
from kivy.app import App
from kivy.uix.widget import Widget
from kivy.uix.button import Button
from kivy.graphics import Color, Ellipse, Line
from kivy.config import Config
from kivy.properties import NumericProperty, ReferenceListProperty, ObjectProperty
from kivy.vector import Vector
from kivy.clock import Clock
# Importing the Dqn object from our AI in ia.py
from ai import Dqn
from tensorboardX import SummaryWriter
# Adding this line if we don't want the right click to put a red point
Config.set('input', 'mouse', 'mouse,multitouch_on_demand')
# Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map
last_x = 0
last_y = 0
n_points = 0 # the total number of points in the last drawing
length = 0 # the length of the last drawing
start = time.time()
# Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function
brain = Dqn(10,3,0.9) # 5 sensors, 3 actions, gama = 0.9
action2rotation = [0,20,-20] # action = 0 => no rotation, action = 1 => rotate 20 degres, action = 2 => rotate -20 degres
last_reward = 0 # initializing the last reward
scores = [] # initializing the mean score curve (sliding window of the rewards) with respect to time
# Initializing the map
first_update = True # using this trick to initialize the map only once
def init():
global sand # sand is an array that has as many cells as our graphic interface has pixels. Each cell has a one if there is sand, 0 otherwise.
global goal_x # x-coordinate of the goal (where the car has to go, that is the airport or the downtown)
global goal_y # y-coordinate of the goal (where the car has to go, that is the airport or the downtown)
global first_update
if os.path.exists('sand.txt'):
print("YES")
sand = np.loadtxt('sand.txt', dtype=int)
else:
sand = np.zeros((longueur,largeur)) # initializing the sand array with only zeros
goal_x = 20 # the goal to reach is at the upper left of the map (the x-coordinate is 20 and not 0 because the car gets bad reward if it touches the wall)
goal_y = largeur - 20 # the goal to reach is at the upper left of the map (y-coordinate)
first_update = False # trick to initialize the map only once
# Initializing the last distance
last_distance = 0
writer = SummaryWriter()
# Creating the car class (to understand "NumericProperty" and "ReferenceListProperty", see kivy tutorials: https://kivy.org/docs/tutorials/pong.html)
class Car(Widget):
angle = NumericProperty(0) # initializing the angle of the car (angle between the x-axis of the map and the axis of the car)
rotation = NumericProperty(0) # initializing the last rotation of the car (after playing the action, the car does a rotation of 0, 20 or -20 degrees)
velocity_x = NumericProperty(0) # initializing the x-coordinate of the velocity vector
velocity_y = NumericProperty(0) # initializing the y-coordinate of the velocity vector
velocity = ReferenceListProperty(velocity_x, velocity_y) # velocity vector
sensor1_x = NumericProperty(0) # initializing the x-coordinate of the first sensor (the one that looks forward)
sensor1_y = NumericProperty(0) # initializing the y-coordinate of the first sensor (the one that looks forward)
sensor1 = ReferenceListProperty(sensor1_x, sensor1_y) # first sensor vector
sensor2_x = NumericProperty(0) # initializing the x-coordinate of the second sensor (the one that looks 30 degrees to the left)
sensor2_y = NumericProperty(0) # initializing the y-coordinate of the second sensor (the one that looks 30 degrees to the left)
sensor2 = ReferenceListProperty(sensor2_x, sensor2_y) # second sensor vector
sensor3_x = NumericProperty(0) # initializing the x-coordinate of the third sensor (the one that looks 30 degrees to the right)
sensor3_y = NumericProperty(0) # initializing the y-coordinate of the third sensor (the one that looks 30 degrees to the right)
sensor3 = ReferenceListProperty(sensor3_x, sensor3_y) # third sensor vector
signal1 = NumericProperty(0) # initializing the signal received by sensor 1
signal2 = NumericProperty(0) # initializing the signal received by sensor 2
signal3 = NumericProperty(0) # initializing the signal received by sensor 3
def move(self, rotation):
self.pos = Vector(*self.velocity) + self.pos # updating the position of the car according to its last position and velocity
self.rotation = rotation # getting the rotation of the car
self.angle = self.angle + self.rotation # updating the angle
self.sensor1 = Vector(30, 0).rotate(self.angle) + self.pos # updating the position of sensor 1
self.sensor2 = Vector(30, 0).rotate((self.angle+30)%360) + self.pos # updating the position of sensor 2
self.sensor3 = Vector(30, 0).rotate((self.angle-30)%360) + self.pos # updating the position of sensor 3
self.signal1 = int(np.sum(sand[int(self.sensor1_x)-10:int(self.sensor1_x)+10, int(self.sensor1_y)-10:int(self.sensor1_y)+10]))/400. # getting the signal received by sensor 1 (density of sand around sensor 1)
self.signal2 = int(np.sum(sand[int(self.sensor2_x)-10:int(self.sensor2_x)+10, int(self.sensor2_y)-10:int(self.sensor2_y)+10]))/400. # getting the signal received by sensor 2 (density of sand around sensor 2)
self.signal3 = int(np.sum(sand[int(self.sensor3_x)-10:int(self.sensor3_x)+10, int(self.sensor3_y)-10:int(self.sensor3_y)+10]))/400. # getting the signal received by sensor 3 (density of sand around sensor 3)
if self.sensor1_x > longueur-10 or self.sensor1_x<10 or self.sensor1_y>largeur-10 or self.sensor1_y<10: # if sensor 1 is out of the map (the car is facing one edge of the map)
self.signal1 = 1. # sensor 1 detects full sand
if self.sensor2_x > longueur-10 or self.sensor2_x<10 or self.sensor2_y>largeur-10 or self.sensor2_y<10: # if sensor 2 is out of the map (the car is facing one edge of the map)
self.signal2 = 1. # sensor 2 detects full sand
if self.sensor3_x > longueur-10 or self.sensor3_x<10 or self.sensor3_y>largeur-10 or self.sensor3_y<10: # if sensor 3 is out of the map (the car is facing one edge of the map)
self.signal3 = 1. # sensor 3 detects full sand
class Ball1(Widget): # sensor 1 (see kivy tutorials: kivy https://kivy.org/docs/tutorials/pong.html)
pass
class Ball2(Widget): # sensor 2 (see kivy tutorials: kivy https://kivy.org/docs/tutorials/pong.html)
pass
class Ball3(Widget): # sensor 3 (see kivy tutorials: kivy https://kivy.org/docs/tutorials/pong.html)
pass
# Creating the game class (to understand "ObjectProperty", see kivy tutorials: kivy https://kivy.org/docs/tutorials/pong.html)
class Game(Widget):
car = ObjectProperty(None) # getting the car object from our kivy file
ball1 = ObjectProperty(None) # getting the sensor 1 object from our kivy file
ball2 = ObjectProperty(None) # getting the sensor 2 object from our kivy file
ball3 = ObjectProperty(None) # getting the sensor 3 object from our kivy file
def serve_car(self): # starting the car when we launch the application
self.car.center = self.center # the car will start at the center of the map
self.car.velocity = Vector(2, 0) # the car will start to go horizontally to the right with a speed of 6
def update(self, dt): # the big update function that updates everything that needs to be updated at each discrete time t when reaching a new state (getting new signals from the sensors)
global brain # specifying the global variables (the brain of the car, that is our AI)
global last_reward # specifying the global variables (the last reward)
global scores # specifying the global variables (the means of the rewards)
global last_distance # specifying the global variables (the last distance from the car to the goal)
global goal_x # specifying the global variables (x-coordinate of the goal)
global goal_y # specifying the global variables (y-coordinate of the goal)
global longueur # specifying the global variables (width of the map)
global largeur # specifying the global variables (height of the map)
longueur = self.width # width of the map (horizontal edge)
largeur = self.height # height of the map (vertical edge)
if first_update: # trick to initialize the map only once
init()
global start
current_time = time.time() - start
xx = goal_x - self.car.x # difference of x-coordinates between the goal and the car
yy = goal_y - self.car.y # difference of y-coordinates between the goal and the car
orientation = Vector(*self.car.velocity).angle((xx,yy))/180. # direction of the car with respect to the goal (if the car is heading perfectly towards the goal, then orientation = 0)
last_signal = [self.car.signal1, self.car.signal2, self.car.signal3, orientation, -orientation, goal_x, goal_y, 1 / abs(xx), 1 / abs(yy), 1 / current_time] # our input state vector, composed of the three signals received by the three sensors, plus the orientation and -orientation
action = brain.update(last_reward, last_signal) # playing the action from our ai (the object brain of the dqn class)
scores.append(brain.score()) # appending the score (mean of the last 100 rewards to the reward window)
rotation = action2rotation[action] # converting the action played (0, 1 or 2) into the rotation angle (0°, 20° or -20°)
self.car.move(rotation) # moving the car according to this last rotation angle
distance = np.sqrt((self.car.x - goal_x)**2 + (self.car.y - goal_y)**2) # getting the new distance between the car and the goal right after the car moved
self.ball1.pos = self.car.sensor1 # updating the position of the first sensor (ball1) right after the car moved
self.ball2.pos = self.car.sensor2 # updating the position of the second sensor (ball2) right after the car moved
self.ball3.pos = self.car.sensor3 # updating the position of the third sensor (ball3) right after the car moved
self.car.velocity = Vector(6, 0).rotate(self.car.angle) # it goes to a normal speed (speed = 6)
last_reward = -0.01 # and it gets bad reward (-0.2)
# if distance < last_distance: # however if it getting close to the goal
# last_reward = 0.1 # it still gets slightly positive reward 0.1
if sand[int(self.car.x),int(self.car.y)] > 0: # if the car is on the sand
self.car.velocity = Vector(1, 0).rotate(self.car.angle) # it is slowed down (speed = 1)
last_reward = -3 # and reward = -1
if self.car.x < 10: # if the car is in the left edge of the frame
self.car.x = 10 # it is not slowed down
last_reward = -3 # but it gets bad reward -1
if self.car.x > self.width-10: # if the car is in the right edge of the frame
self.car.x = self.width-10 # it is not slowed down
last_reward = -3 # but it gets bad reward -1
if self.car.y < 10: # if the car is in the bottom edge of the frame
self.car.y = 10 # it is not slowed down
last_reward = -3 # but it gets bad reward -1
if self.car.y > self.height-10: # if the car is in the upper edge of the frame
self.car.y = self.height-10 # it is not slowed down
last_reward = -3 # but it gets bad reward -1
if distance < 100: # when the car reaches its goal
goal_x = self.width - goal_x # the goal becomes the bottom right corner of the map (the downtown), and vice versa (updating of the x-coordinate of the goal)
goal_y = self.height - goal_y # the goal becomes the bottom right corner of the map (the downtown), and vice versa (updating of the y-coordinate of the goal)
last_reward += 20
# global start
start = time.time()
if current_time > 25:
last_reward += -current_time * 0.01
# Updating the last distance from the car to the goal
last_distance = distance
writer.add_scalar('data/current_time', current_time)
writer.add_scalar('data/distance', distance)
writer.add_scalar('data/last_reward', last_reward)
# writer.export_scalars_to_json("./all_scalars.json")
# Painting for graphic interface (see kivy tutorials: https://kivy.org/docs/tutorials/firstwidget.html)
class MyPaintWidget(Widget):
def on_touch_down(self, touch): # putting some sand when we do a left click
global length,n_points,last_x,last_y
with self.canvas:
Color(0.8,0.7,0)
d=10.
touch.ud['line'] = Line(points = (touch.x, touch.y), width = 10)
last_x = int(touch.x)
last_y = int(touch.y)
n_points = 0
length = 0
sand[int(touch.x),int(touch.y)] = 1
def on_touch_move(self, touch): # putting some sand when we move the mouse while pressing left
global length,n_points,last_x,last_y
if touch.button=='left':
touch.ud['line'].points += [touch.x, touch.y]
x = int(touch.x)
y = int(touch.y)
length += np.sqrt(max((x - last_x)**2 + (y - last_y)**2, 2))
n_points += 1.
density = n_points/(length)
touch.ud['line'].width = int(20*density + 1)
sand[int(touch.x) - 10 : int(touch.x) + 10, int(touch.y) - 10 : int(touch.y) + 10] = 1
last_x = x
last_y = y
# API and switches interface (see kivy tutorials: https://kivy.org/docs/tutorials/pong.html)
class CarApp(App):
def build(self): # building the app
parent = Game()
parent.serve_car()
Clock.schedule_interval(parent.update, 1.0 / 60.0)
self.painter = MyPaintWidget()
clear_btn = Button(text='clear')
save_btn = Button(text='save',pos=(parent.width,0))
load_btn = Button(text='load',pos=(2*parent.width,0))
save_sand_btn = Button(text='save sand',pos=(3*parent.width,0))
load_sand_btn = Button(text='load sand',pos=(4*parent.width,0))
clear_btn.bind(on_release=self.clear_canvas)
save_btn.bind(on_release=self.save)
load_btn.bind(on_release=self.load)
save_sand_btn.bind(on_release=self.save_sand)
load_sand_btn.bind(on_release=self.load_sand)
parent.add_widget(self.painter)
parent.add_widget(clear_btn)
parent.add_widget(save_btn)
parent.add_widget(load_btn)
parent.add_widget(save_sand_btn)
parent.add_widget(load_sand_btn)
return parent
def clear_canvas(self, obj): # clear button
global sand
self.painter.canvas.clear()
sand = np.zeros((longueur,largeur))
def save(self, obj): # save button
print("saving brain...")
brain.save()
plt.plot(scores)
plt.show()
def load(self, obj): # load button
print("loading last saved brain...")
brain.load()
def save_sand(self, obj): # load button
global sand
np.savetxt('sand.txt', sand, fmt='%d')
def load_sand(self, obj): # load button
print("loading sand...")
global sand
sand = np.loadtxt('sand.txt', dtype=int)
# Running the app
if __name__ == '__main__':
CarApp().run()