-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
259 lines (214 loc) · 10.7 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
from TemplateMatcher import TemplateMatcher
from WindowGrabber import WindowGrabber
from Player import Player
from GameCache import GameCache
import cv2
import time
import numpy as np
import math
import sys
import argparse
# read arguments
parser = argparse.ArgumentParser()
group = parser.add_argument_group('rewards')
group.add_argument('--rewardlive', type=float, help='Reward for every frame the agent is alive. Default: 1', default=1)
group.add_argument('--rewarddead', type=float, help='Reward for every frame the agent is dead. Default: -10', default=-10)
group = parser.add_argument_group('cache')
group.add_argument('--cacheprefix', type=str, help='Prefix for all cache files. Default: cache/', default='cache/')
group.add_argument('--cachemin', type=int, help='Minimum cache size before training starts. Default: 50000', default=50000)
group.add_argument('--cachemax', type=int, help='Maximum cache size before overwriting images. Default: 1000000', default=1000000)
group = parser.add_argument_group('neural network')
group.add_argument('--batchsize', type=int, help='Size of each batch for training the neural network. Default: 50', default=50)
group.add_argument('--imagesize', type=int, nargs=2, help='Height and width of the image. Default: 128 256', default=[128, 256])
group.add_argument('--imagecount', type=int, help='Number of images for each prediction by the network. Default: 4', default=4)
group.add_argument('--updatebatches', type=int, help='Number of training batches to do before updating the target weights. Default: 10000', default=10000)
group.add_argument('-k', '--kernel', dest='kernels', metavar=('height', 'width'), type=int, nargs=2, action='append'
, help='Add a convolution kernel. Can be used multiple times.')
group.add_argument('-s', '--stride', dest='strides', metavar=('height', 'width'), type=int, nargs=2, action='append'
, help='Specify the stride (height, width) for each convolution. Can be used multiple times. Must be used the same number of times as --kernel.')
group.add_argument('-c', '--channels', dest='channels', metavar='channels', type=int, action='append'
, help='Specify the number of channels for each convolution. Can be used multiple times. Must be used the same number of times as --kernel.')
group.add_argument('-d', '--dense', dest='denses', metavar='neurons', type=int, action='append'
, help='Add a dense layer after the convolution layers with the specified number of neurons. Can be used multiple times.')
group.add_argument('--savedir', type=str, help='Directory to save weights and training metrics. Default: model/', default='model/')
group = parser.add_argument_group('machine learning')
group.add_argument('--learningrate', type=float, help='Learning rate. Default: 1e-4', default=1e-4)
group.add_argument('-g', '--gamma', type=float, help='Discount rate gamma. Default: 0.99', default=0.99)
# parse arguments
args = parser.parse_args()
print(args)
assert len(args.kernels) == len(args.strides), 'Must specify a --stride for each --kernel'
assert len(args.kernels) == len(args.channels), 'Must specify a --channels for each --kernel'
# constants
LIVE_REWARD = args.rewardlive
DEAD_REWARD = args.rewarddead
MIN_CACHE = args.cachemin # only train if we have this many states
MAX_CACHE = args.cachemax
GAMMA = args.gamma
BATCH_SIZE = args.batchsize
EPSILON_RANGE = (1.0, 0.1)
EPSILON_ITERATION_END = 1e6
UPDATE_TARGET_ITERATIONS = args.updatebatches
IMAGE_SHAPE = args.imagesize # (height, width)
IMAGES_BCK = args.imagecount - 1 # DO NOT CHANGE
IMAGES_FWD = 1 # DO NOT CHANGE
IMAGES_PER_STATE = 1 + IMAGES_BCK + IMAGES_FWD # DO NOT CHANGE
STATE_SHAPE = IMAGE_SHAPE + [IMAGES_PER_STATE] # DO NOT CHANGE
NUM_ACTIONS = 3
# create finite state machine variable
fsm = 'restarting'
# create array to store 'state' of game, which is a sequence of images in the shape (height, width, time_steps + 1)
state = np.zeros(STATE_SHAPE)
state_previous = np.zeros(STATE_SHAPE)
# create information for the game
player = Player(
learning_rate = args.learningrate
, width = IMAGE_SHAPE[1]
, height = IMAGE_SHAPE[0]
, time_steps = args.imagecount
, actions = NUM_ACTIONS
, conv_kernels = args.kernels
, conv_strides = args.strides
, conv_channels = args.channels
, dense_channels = args.denses
, save_dir = args.savedir
)
gameover_matcher = TemplateMatcher(cv2.Canny(cv2.imread('res/gameover.png'), 50, 200), 0.5)
window_grabber = WindowGrabber()
game_cache = GameCache(args.cacheprefix, IMAGE_SHAPE, NUM_ACTIONS, MAX_CACHE)
# restore player if we can
player.restore()
def calc_epsilon():
global player
iterations = player.net.global_step()
if iterations >= EPSILON_ITERATION_END:
epsilon = EPSILON_RANGE[1]
else:
epsilon = ((EPSILON_ITERATION_END - iterations) * EPSILON_RANGE[0] + iterations * EPSILON_RANGE[1]) / EPSILON_ITERATION_END
return epsilon
player.policy = lambda state: player.epsilon_greedy_action(state, calc_epsilon())
# select window
print("Select window in 3 seconds")
time.sleep(3)
window_grabber.set_window_to_focus()
player.set_window_to_focus()
# create variable to keep track of what time_step in each game we are at
time_step = 0
# define necessary variables, just to be clear
image_state = None
image_state_previous = None
values = None
values_previous = None
action = None
action_previous = None
reward_previous = None
def train(print_bar=True, bar_length=20):
global player, game_cache
# randomly permute game caches and iterate
gc = game_cache
if len(gc) == 0:
return
# randomly permute states and iterate
indices = np.random.permutation(len(gc))
batches = math.ceil(indices.shape[0] / BATCH_SIZE)
for batch_i in range(0, batches):
# update target weights every so many iterations
if player.net.global_step() % UPDATE_TARGET_ITERATIONS == 0:
print('\nUpdated target function weights') # newline to print nicely with bar
player.update_target_function()
batch_indices = indices[batch_i * BATCH_SIZE : min(len(indices), (batch_i+1) * BATCH_SIZE)]
training_states = gc.state(batch_indices, IMAGES_BCK, IMAGES_FWD)
training_rewards = gc.reward(batch_indices)
training_actions = gc.action(batch_indices)
training_next_terminal = gc.terminal(batch_indices)
player.learn(training_states, training_actions, training_rewards, training_next_terminal, GAMMA)
# print progress bar
if print_bar:
if batches == 1:
fraction_done = 1
else:
fraction_done = batch_i / (batches - 1)
bar_segments = math.floor(bar_length * fraction_done)
sys.stdout.write('\r')
sys.stdout.write(('\r[%-' + str(bar_length) + 's] %d%%') % ('=' * bar_segments, fraction_done * 100))
sys.stdout.flush()
# print new line after progress bar after complete
if print_bar:
print()
# do summary on whatever the last batch was
player.net.summarize(training_states, training_actions, training_rewards, training_next_terminal, GAMMA)
# run player
newgame = False
while True:
try:
# grab the frame of the game
image_raw = window_grabber.grab()
# check if the game is over
image_canny = cv2.Canny(image_raw, 50, 200)
gameover = gameover_matcher.isMatch(image_canny)
# create image for adding to the state
image_state = cv2.cvtColor(cv2.resize(image_raw, (state.shape[1], state.shape[0])), cv2.COLOR_RGBA2GRAY)
# create display image
image_display = cv2.cvtColor(cv2.resize(image_state, (image_raw.shape[1], image_raw.shape[0]), interpolation=cv2.INTER_NEAREST), cv2.COLOR_GRAY2BGR)
# add previous image to the game cache
if time_step != 0:
# calculate the reward for the previous image and the action taken
reward_previous = DEAD_REWARD if gameover else LIVE_REWARD
# push previous state
game_cache.push(image_state_previous, values_previous, action_previous, reward_previous, gameover)
# update finite state machine
if fsm == 'playing':
if gameover:
print('Game over with cache of size ' + str(len(game_cache)))
fsm = 'gameover'
elif fsm == 'restarting':
if not gameover:
print('New game started')
fsm = 'playing'
# evalutate finite state machine (and sometimes update)
if fsm == 'restarting':
# restart the game
player.gc.restart()
elif fsm == 'gameover':
fsm = 'restarting'
# write game over on display image
cv2.putText(image_display, "Game Over", (0, image_display.shape[0]), cv2.FONT_HERSHEY_PLAIN, 1, (0,0,255))
cv2.waitKey(1)
if len(game_cache) >= MIN_CACHE:
# train the player
print ('Training on ' + str(len(game_cache)) + ' states')
train()
print ('Training finished for step ' + str(player.net.global_step()))
time_step = 0
elif fsm == 'playing':
# if the game not over, add the new image to the state
if time_step == 0:
# repeat the image for each time_step
state = np.repeat(image_state[...,None], state.shape[-1], axis=-1)
else:
# roll state over, and add new image to the end
state = np.roll(state, -1, axis=-1)
state[...,-1] = image_state
# get player's action values
values = player.value(state)
# get player to take action
action = player.policy_action(state)
player.act(action)
# write values on display image
cv2.putText(image_display, "Epsilon: " + str(calc_epsilon()), (0, image_display.shape[0] - 48), cv2.FONT_HERSHEY_PLAIN, 1, (0,255,0))
cv2.putText(image_display, "Left: " + str(values[0]) , (0, image_display.shape[0] - 32), cv2.FONT_HERSHEY_PLAIN, 1, (0,255,0))
cv2.putText(image_display, "Stop: " + str(values[1]) , (0, image_display.shape[0] - 16), cv2.FONT_HERSHEY_PLAIN, 1, (0,255,0))
cv2.putText(image_display, "Right: " + str(values[2]) , (0, image_display.shape[0] ), cv2.FONT_HERSHEY_PLAIN, 1, (0,255,0))
time_step += 1
image_state_previous = image_state
action_previous = action
values_previous = values
state_previous = state
cv2.imshow("Player", image_display)
cv2.waitKey(1)
except KeyboardInterrupt:
print('Saving player')
player.save()
print('Saving cache')
game_cache.save()
break