This repository has been archived by the owner on Jul 22, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathtest_agent.py
305 lines (262 loc) · 14.9 KB
/
test_agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
import argparse
import numpy as np
import random
from time import time
import torch
import pickle
import agent
from utils import extractor
from utils.generic import getUniqueFileHandler
from utils.kg import construct_kg, load_manual_graphs, RelationExtractor
from utils.textworld_utils import get_goal_graph
from utils.nlp import Tokenizer
from games import dataset
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def play(agent, opt, random_action=False):
filter_examine_cmd = False
infos_to_request = agent.infos_to_request
infos_to_request.max_score = True # Needed to normalize the scores.
game_path = opt.game_dir + "/" + (
str(opt.difficulty_level) + "/" + opt.mode if opt.difficulty_level != '' else opt.game_dir + "/" + opt.mode )
manual_world_graphs = {}
if opt.graph_emb_type and 'world' in opt.graph_type:
print("Loading Knowledge Graph ... ", end='')
agent.kg_graph, _, _= construct_kg(game_path + '/conceptnet_subgraph.txt')
print(' DONE')
# optional: Use complete or brief manually extracted conceptnet subgraph for the agent
print("Loading Manual World Graphs ... ", end='')
manual_world_graphs = load_manual_graphs(game_path + '/manual_subgraph_brief')
if opt.game_name:
game_path = game_path + "/"+ opt.game_name
env, game_file_names = dataset.get_game_env(game_path, infos_to_request, opt.max_step_per_episode, opt.batch_size,
opt.mode, opt.verbose)
# Get Goals as graphs
goal_graphs = {}
for game_file in env.gamefiles:
goal_graph = get_goal_graph(game_file)
if goal_graph:
game_id = game_file.split('-')[-1].split('.')[0]
goal_graphs[game_id] = goal_graph
# Collect some statistics: nb_steps, final reward.
total_games_count = len(game_file_names)
game_identifiers, avg_moves, avg_scores, avg_norm_scores, max_poss_scores = [], [], [], [], []
for no_episode in (range(opt.nepisodes)):
if not random_action:
random.seed(no_episode)
np.random.seed(no_episode)
torch.manual_seed(no_episode)
if torch.cuda.is_available():
torch.cuda.manual_seed(no_episode)
env.seed(no_episode)
agent.start_episode(opt.batch_size)
avg_eps_moves, avg_eps_scores, avg_eps_norm_scores = [], [], []
num_games = total_games_count
game_max_scores = []
game_names = []
while num_games > 0:
obs, infos = env.reset() # Start new episode.
if filter_examine_cmd:
for commands_ in infos["admissible_commands"]: # [open refri, take apple from refrigeration]
for cmd_ in [cmd for cmd in commands_ if cmd.split()[0] in ["examine", "look"]]:
commands_.remove(cmd_)
batch_size = len(obs)
num_games -= len(obs)
game_goal_graphs = [None] * batch_size
max_scores = []
game_ids = []
game_manual_world_graph = [None] * batch_size
for b, game in enumerate(infos["game"]):
max_scores.append(game.max_score)
if "uuid" in game.metadata:
game_id = game.metadata["uuid"].split("-")[-1]
game_ids.append(game_id)
game_names.append(game_id)
game_max_scores.append(game.max_score)
if len(goal_graphs):
game_goal_graphs[b] = goal_graphs[game_id]
if len(manual_world_graphs):
game_manual_world_graph[b] = manual_world_graphs[game_id]
if not game_ids:
game_ids = range(num_games,num_games+batch_size)
game_names.extend(game_ids)
commands = ["restart"]*len(obs)
scored_commands = [[] for b in range(batch_size)]
last_scores = [0.0]*len(obs)
scores = [0.0]*len(obs)
dones = [False]*len(obs)
nb_moves = [0]*len(obs)
infos["goal_graph"] = game_goal_graphs
infos["manual_world_graph"] = game_manual_world_graph
agent.reset_parameters(opt.batch_size)
for step_no in range(opt.max_step_per_episode):
nb_moves = [step + int(not done) for step, done in zip(nb_moves, dones)]
if agent.graph_emb_type and ('local' in agent.graph_type or 'world' in agent.graph_type):
agent.update_current_graph(obs, commands, scored_commands, infos, opt.graph_mode)
commands = agent.act(obs, scores, dones, infos, scored_commands, random_action)
obs, scores, dones, infos = env.step(commands)
infos["goal_graph"] = game_goal_graphs
infos["manual_world_graph"] = game_manual_world_graph
for b in range(batch_size):
if scores[b] - last_scores[b] > 0:
last_scores[b] = scores[b]
scored_commands[b].append(commands[b])
if all(dones):
break
if step_no == opt.max_step_per_episode - 1:
dones = [True for _ in dones]
agent.act(obs, scores, dones, infos, scored_commands, random_action) # Let the agent know the game is done.
if opt.verbose:
print(".", end="")
avg_eps_moves.extend(nb_moves)
avg_eps_scores.extend(scores)
avg_eps_norm_scores.extend([score/max_score for score, max_score in zip(scores, max_scores)])
if opt.verbose:
print("*", end="")
agent.end_episode()
game_identifiers.append(game_names)
avg_moves.append(avg_eps_moves) # episode x # games
avg_scores.append(avg_eps_scores)
avg_norm_scores.append(avg_eps_norm_scores)
max_poss_scores.append(game_max_scores)
env.close()
game_identifiers = np.array(game_identifiers)
avg_moves = np.array(avg_moves)
avg_scores = np.array(avg_scores)
avg_norm_scores = np.array(avg_norm_scores)
max_poss_scores = np.array(max_poss_scores)
if opt.verbose:
idx = np.apply_along_axis(np.argsort, axis=1, arr=game_identifiers)
game_avg_moves = np.mean(np.array(list(map(lambda x, y: y[x], idx, avg_moves))), axis=0)
game_norm_scores = np.mean(np.array(list(map(lambda x, y: y[x], idx, avg_norm_scores))), axis=0)
game_avg_scores = np.mean(np.array(list(map(lambda x, y: y[x], idx, avg_scores))), axis=0)
msg = "\nGame Stats:\n-----------\n" + "\n".join(
" Game_#{} = Score: {:5.2f} Norm_Score: {:5.2f} Moves: {:5.2f}/{}".format(game_no,avg_score,
norm_score, avg_move,
opt.max_step_per_episode)
for game_no, (norm_score, avg_score, avg_move) in
enumerate(zip(game_norm_scores, game_avg_scores, game_avg_moves)))
print(msg)
total_avg_moves = np.mean(game_avg_moves)
total_avg_scores = np.mean(game_avg_scores)
total_norm_scores = np.mean(game_norm_scores)
msg = opt.mode+" stats: avg. score: {:4.2f}; norm. avg. score: {:4.2f}; avg. steps: {:5.2f}; \n"
print(msg.format(total_avg_scores, total_norm_scores,total_avg_moves))
## Dump log files ......
str_result = {opt.mode + 'game_ids': game_identifiers, opt.mode + 'max_scores': max_poss_scores,
opt.mode + 'scores_runs': avg_scores, opt.mode + 'norm_score_runs': avg_norm_scores,
opt.mode + 'moves_runs': avg_moves}
results_ofile = getUniqueFileHandler(opt.results_filename + '_' +opt.mode+'_results')
pickle.dump(str_result, results_ofile)
return avg_scores, avg_norm_scores, avg_moves
if __name__ == '__main__':
random.seed(42)
parser = argparse.ArgumentParser(add_help=False)
# game files and other directories
parser.add_argument('--game_dir', default='./games/twc', help='Location of the game e.g ./games/testbed')
parser.add_argument('--game_name', help='Name of the game file e.g., kitchen_cleanup_10quest_1.ulx, *.ulx, *.z8')
parser.add_argument('--results_dir', default='./results', help='Path to the results files')
parser.add_argument('--logs_dir', default='./logs', help='Path to the logs files')
parser.add_argument('--pretrained_model', required=True, help='Location of the pretrained command scorer model')
# optional arguments (if game_name is given) for game files
parser.add_argument('--batch_size', type=int, default='1', help='Number of the games per batch')
parser.add_argument('--difficulty_level', default='easy', choices=['easy','medium', 'hard'],
help='difficulty level of the games')
# Experiments
parser.add_argument('--initial_seed', type=int, default=42)
parser.add_argument('--nruns', type=int, default=5)
parser.add_argument('--runid', type=int, default=0)
parser.add_argument('--no_train_episodes', type=int, default=100)
parser.add_argument('--no_eval_episodes', type=int, default=5)
parser.add_argument('--train_max_step_per_episode', type=int, default=50)
parser.add_argument('--eval_max_step_per_episode', type=int, default=50)
parser.add_argument('--verbose', action='store_true', default=True)
parser.add_argument('--hidden_size', type=int, default=300, help='num of hidden units for embeddings')
parser.add_argument('--hist_scmds_size', type=int, default=3,
help='Number of recent scored command history to use. Useful when the game has intermediate reward.')
parser.add_argument('--ngram', type=int, default=3)
parser.add_argument('--token_extractor', default='max', help='token extractor: (any or max)')
parser.add_argument('--corenlp_url', default='http://localhost:9000/',
help='URL for Stanford CoreNLP OpenIE Server for the relation extraction for the local graph')
parser.add_argument('--noun_only_tokens', action='store_true', default=False,
help=' Allow only noun for the token extractor')
parser.add_argument('--use_stopword', action='store_true', default=False,
help=' Use stopwords for the token extractor')
parser.add_argument('--agent_type', default='knowledgeaware', choices=['random','simple', 'knowledgeaware'],
help='Agent type for the text world: (random, simple, knowledgeable)')
parser.add_argument('--graph_type', default='', choices=['', 'local','world'],
help='What type of graphs to be generated')
parser.add_argument('--graph_mode', default='evolve', choices=['full', 'evolve'],
help='Give Full ground truth graph or evolving knowledge graph: (full, evolve)')
parser.add_argument('--split', default='test', choices=['test', 'valid'],
help='Whether to run on test or valid')
parser.add_argument('--local_evolve_type', default='direct', choices=['direct', 'ground'],
help='Type of the generated/evolving strategy for local graph')
parser.add_argument('--world_evolve_type', default='cdc',
choices=['DC','CDC', 'NG','NG+prune','manual'],
help='Type of the generated/evolving strategy for world graph')
# Embeddings
parser.add_argument('--emb_loc', default='embeddings/', help='Path to the embedding location')
parser.add_argument('--word_emb_type', default='glove',
help='Embedding type for the observation and the actions: ...'
'(random, glove, numberbatch, fasttext). Use utils.generic.load_embedings ...'
' to take car of the custom embedding locations')
parser.add_argument('--graph_emb_type', help='Knowledge Graph Embedding type for actions: (numberbatch, complex)')
parser.add_argument('--egreedy_epsilon', type=float, default=0.0, help="Epsilon for the e-greedy exploration")
opt = parser.parse_args()
print(opt)
random.seed(opt.initial_seed)
np.random.seed(opt.initial_seed)
torch.manual_seed(opt.initial_seed) # For reproducibility
if torch.cuda.is_available():
torch.cuda.manual_seed(opt.initial_seed)
torch.backends.cudnn.deterministic = True
# yappi.start()
scores_runs = []
norm_score_runs = []
moves_runs = []
test_scores_runs = []
test_norm_score_runs = []
test_moves_runs = []
random_action = False
if opt.agent_type == 'random':
random_action = True
opt.graph_emb_type = None
if opt.agent_type == 'simple':
opt.graph_type = ''
opt.graph_emb_type = None
tk_extractor = extractor.get_extractor(opt.token_extractor)
results_filename = opt.results_dir + '/' + opt.agent_type + '_' + opt.game_dir.split('/')[-1] + '_' + (
opt.graph_mode + '_' + opt.graph_type + '_' if opt.graph_type else '') + (
str(opt.word_emb_type) + '_' if opt.word_emb_type else '') + (
str(opt.graph_emb_type) + '-' if opt.graph_emb_type else '') + str(
opt.nruns) + 'runs_' + str(opt.no_train_episodes) + 'episodes_' + str(opt.hist_scmds_size) + 'hsize_' + str(
opt.egreedy_epsilon) + 'eps_' + opt.difficulty_level+'_' + opt.local_evolve_type+'_' + opt.world_evolve_type + '_' + str(opt.runid) + 'runId'
opt.results_filename = results_filename
graph = None
seeds = [random.randint(1, 100) for _ in range(opt.nruns)]
for n in range(0, opt.nruns):
opt.run_no = n
opt.seed = seeds[n]
random.seed(opt.seed)
np.random.seed(opt.seed)
torch.manual_seed(opt.seed) # For reproducibility
if torch.cuda.is_available():
torch.cuda.manual_seed(opt.seed)
print("Testing ...")
emb_types = [opt.word_emb_type, opt.graph_emb_type]
tokenizer = Tokenizer(noun_only_tokens=opt.noun_only_tokens, use_stopword=opt.use_stopword, ngram=opt.ngram,
extractor=tk_extractor)
rel_extractor = RelationExtractor(tokenizer, openie_url=opt.corenlp_url)
myagent = agent.KnowledgeAwareAgent(graph, opt, tokenizer,rel_extractor, device)
myagent.type = opt.agent_type
print('Loading Pretrained Model ...',end='')
myagent.model.load_state_dict(torch.load(opt.pretrained_model))
print('DONE')
myagent.test(opt.batch_size)
opt.mode = opt.split
opt.nepisodes=opt.no_eval_episodes # for testing
opt.max_step_per_episode = opt.eval_max_step_per_episode
starttime = time()
print("\n RUN ", n, "\n")
test_scores, test_norm_scores, test_moves = play(myagent, opt, random_action=random_action)
print("Tested in {:.2f} secs".format(time() - starttime))