-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoptimizations.py
491 lines (427 loc) · 20.9 KB
/
optimizations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
from myutils import get_rec_pid, get_path_type
from metrics import *
def sort_by_LIR(path_full):
return LIR_single(path_full[2])
def sort_by_SEP(path_full):
return SEP_single(path_full[2])
# Soft optimization LIR, for every user topk predicted by the baseline,
# get the predicted paths for every item and change the explanation according to LIR motivations
def soft_optimization_LIR(path_data):
pred_paths = path_data.pred_paths
for uid, topk in path_data.uid_topk.items():
#Retrive topk explanations without changin the selected pids
for pid in topk:
pred_paths[uid][pid].sort(key=lambda x: LIR_single(path_data, x[-1]), reverse=True)
path_data.uid_pid_explanation[uid][pid] = pred_paths[uid][pid][0][-1]
# Soft optimization LIR, for every user topk predicted by the baseline,
# get the predicted paths for every item and change the explanation according to SEP motivations
def soft_optimization_SEP(path_data):
pred_path = path_data.pred_paths
for uid, topk in path_data.uid_topk.items():
#Retrive topk explanations without changin the selected pids
for pid in topk:
pred_path[uid][pid].sort(key=lambda x: SEP_single(path_data, x[-1]), reverse=True)
path_data.uid_pid_explanation[uid][pid] = pred_path[uid][pid][0][-1]
def soft_optimization_ETD(path_data):
pred_path = path_data.pred_paths
for uid, topk in path_data.uid_topk.items():
path_data.uid_pid_explanation[uid] = {}
ptype_seen = set()
for pid in topk:
curr_size = len(path_data.uid_pid_explanation[uid])
current_item_pred_paths = pred_path[uid][pid]
current_item_pred_paths.sort(key=lambda x: x[1]) #Sort for probability
for path in current_item_pred_paths:
ptype = get_path_type(path[-1])
if ptype not in ptype_seen:
path_data.uid_pid_explanation[uid][pid] = path[-1]
ptype_seen.add(ptype)
break
# No different path have been found
if curr_size == len(path_data.uid_pid_explanation[uid]):
path_data.uid_pid_explanation[uid][pid] = current_item_pred_paths[0][-1]
#LIR Alpha optimization
def weighted_opt_LIR(path_data, alpha):
pred_path = path_data.pred_paths
#Pred_paths {uid: {pid: [[path_score, path_prob_path], ..., [path_score, path_prob_path]], ...,
# pidn: [[path_score, path_prob_path], ..., [path_score, path_prob_path]]]}, ..., uidn: {pid: ...}}
for uid, pid_list in pred_path.items():
candidates = []
best_candidates = []
best_candidates_pids = set()
#Create candidate list
for pid, path_list in pid_list.items():
candidates.extend(path_list)
candidates.sort(key=lambda candidate: (candidate[0] * (1-alpha)) + (LIR_single(path_data, candidate[-1]) * alpha), reverse=True)
#Pick the best items
for candidate in candidates:
rec_pid = get_rec_pid(candidate)
if rec_pid in best_candidates_pids: continue
best_candidates.append(candidate)
best_candidates_pids.add(rec_pid)
if len(best_candidates) == 10: break
#if len(best_candidates) < 10:
# print("LSEPS THAN 10!")
#Reorder topk by path_score
best_candidates.sort(key=lambda candidate: candidate[0],
reverse=True)
# Update the topk with the reranked one
path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
path_data.uid_pid_explanation[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}
return path_data
#SEP Alpha optimization
def weighted_opt_SEP(path_data, alpha):
pred_paths = path_data.pred_paths
for uid, pid_list in pred_paths.items():
candidates = []
best_candidates = []
best_candidates_pids = set()
for pid, path_list in pid_list.items():
path_list.sort(key=lambda x: x[1], reverse=True)
candidates.extend(path_list)
candidates.sort(key=lambda x: (x[0] * (1-alpha)) + (SEP_single(path_data, x[-1]) * alpha), reverse=True)
#Pick the best items
for candidate in candidates:
rec_pid = get_rec_pid(candidate)
if rec_pid in best_candidates_pids: continue
best_candidates.append(candidate)
best_candidates_pids.add(rec_pid)
if len(best_candidates) == 10: break
#if len(best_candidates) < 10:
# print("LSEPS THAN 10!")
#Reorder topk by path_score
best_candidates.sort(key=lambda candidate: candidate[0],
reverse=True)
# Update the topk with the reranked one
path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
path_data.uid_pid_explanation[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}
return path_data
'''
#ETD Alpha optimization
def weighted_opt_ETD(path_data, alpha):
pred_path = path_data.pred_paths
for uid, pid_list in pred_path.items():
candidates = []
best_candidates = []
best_candidates_pids = set()
#Populate candidate list
for pid, path_list in pid_list.items():
path_list = pred_path[uid][pid]
candidates.extend(path_list)
# Create a bin for every path type and insert in them every path of that type
bins = {}
for candidate in candidates:
candidate_path_type = get_path_type(candidate[-1])
if get_path_type(candidate[-1]) not in bins:
bins[candidate_path_type] = []
bins[candidate_path_type].append(candidate)
# Sort every path type bin by a mixed score based on explanation time relevance and item relevance
for bin_type, path_list in bins.items():
path_list.sort(key=lambda x: x[0],
reverse=True)
#Search the best for path_score among all the top of every bin, if the paths isn't already seen in the best_candidate give him a alpha bonus
ptype_seen = set()
while len(best_candidates) < 10:
best_type = ""
best_score = -1
for bin_type, path_list in bins.items():
if len(path_list) == 0: continue
candidate = path_list[0]
rec_pid = get_rec_pid(path_list[0][-1])
while rec_pid in best_candidates_pids:
path_list.pop(0)
if len(path_list) == 0: break
candidate = path_list[0]
rec_pid = get_rec_pid(candidate[-1])
if len(path_list) == 0: continue
bonus = alpha if get_path_type(candidate[-1]) not in ptype_seen else 0
score = candidate[0] + bonus
if score > best_score:
best_score = score
best_type = get_path_type(candidate[-1])
if best_type == "":
# print("Less than 10: {}".format(len(best_candidates)))
break
best_candidates.append(bins[best_type][0])
best_candidates_pids.add(get_rec_pid(bins[best_type][0][-1]))
ptype_seen.add(best_type)
bins[best_type].pop(0)
if len(bins[best_type]) == 0:
bins.pop(best_type)
# Rearrange the topk based on the metric
best_candidates.sort(key=lambda x: x[0],
reverse=True)
# Update the topk with the reranked one
path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
path_data.uid_pid_explanation[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}
return path_data
'''
def weighted_opt_ETD(path_data, alpha):
pred_path = path_data.pred_paths
for uid, pid_list in pred_path.items():
candidates = []
candidates_pids = {}
best_candidates = []
best_candidates_pids = set()
#Populate candidate list
for pid, path_list in pid_list.items():
for path in path_list:
if pid not in candidates_pids:
candidates_pids[pid] = {}
path_type = get_path_type(path[-1])
if path_type not in candidates_pids[pid]:
candidates_pids[pid][path_type] = []
candidates_pids[pid][path_type].append(path)
for pid, path_type_paths in candidates_pids.items():
for path_type, paths in path_type_paths.items():
paths.sort(key=lambda x: x[1], #redudant paths should be already sorted by prob
reverse=True)
candidates.append(paths[0])
# Create a bin for every path type and insert in them every path of that type
bins = {}
for candidate in candidates:
candidate_path_type = get_path_type(candidate[-1])
if get_path_type(candidate[-1]) not in bins:
bins[candidate_path_type] = []
bins[candidate_path_type].append(candidate)
# Sort every path type bin by a mixed score based on explanation time relevance and item relevance
for bin_type, path_list in bins.items():
path_list.sort(key=lambda x: x[0],
reverse=True)
#Search the best for path_score among all the top of every bin, if the paths isn't already seen in the best_candidate give him a alpha bonus
ptype_seen = set()
while len(best_candidates) < 10:
best_type = ""
best_score = -1
for bin_type, path_list in bins.items():
if len(path_list) == 0: continue
candidate = path_list[0]
rec_pid = get_rec_pid(path_list[0][-1])
while rec_pid in best_candidates_pids:
path_list.pop(0)
if len(path_list) == 0: break
candidate = path_list[0]
rec_pid = get_rec_pid(candidate[-1])
if len(path_list) == 0: continue
bonus = alpha if get_path_type(candidate[-1]) not in ptype_seen else 0
score = candidate[0] + bonus
if score > best_score:
best_score = score
best_type = get_path_type(candidate[-1])
if best_type == "":
#print("Less than 10: {}".format(len(best_candidates)))
break
best_candidates.append(bins[best_type][0])
best_candidates_pids.add(get_rec_pid(bins[best_type][0][-1]))
ptype_seen.add(best_type)
bins[best_type].pop(0)
if len(bins[best_type]) == 0:
bins.pop(best_type)
# Rearrange the topk based on the metric
best_candidates.sort(key=lambda x: x[0],
reverse=True)
# Update the topk with the reranked one
path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
path_data.uid_pid_explanation[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}
return path_data
#LIR+SEP Optimization
def weighted_opt_LIR_SEP(path_data, alpha):
pred_path = path_data.pred_paths
for uid, pid_list in pred_path.items():
candidates = []
best_candidates = []
best_candidates_pids = set()
# Create candidate list
for pid, path_list in pid_list.items():
candidates.extend(path_list)
# Normalize scores between 0 and 1
scores_list = [candidate[0] for candidate in candidates]
min_score = min(scores_list)
max_score = max(scores_list)
for candidate in candidates:
scale = max_score - min_score if max_score - min_score > 0. else 0.000001
candidate[0] = (candidate[0] - min_score) / scale
candidates.sort(key=lambda x: (x[0] * (1-alpha)) + ((LIR_single(path_data, x[-1]) + SEP_single(path_data, x[-1])) * alpha),
reverse=True)
# Pick the best items
for candidate in candidates:
rec_pid = get_rec_pid(candidate)
if rec_pid in best_candidates_pids: continue
best_candidates.append(candidate)
best_candidates_pids.add(rec_pid)
if len(best_candidates) == 10: break
#if len(best_candidates) < 10:
# print("LSEPS THAN 10!")
# Reorder topk by path_score
best_candidates.sort(key=lambda candidate: candidate[0], reverse=True)
# Update the topk with the reranked one
path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
path_data.uid_pid_explanation[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}
return path_data
def weighted_opt_ETD_LIR(path_data, alpha):
pred_path = path_data.pred_paths
for uid, pid_list in pred_path.items():
candidates = []
best_candidates = []
best_candidates_pids = set()
for pid, path_list in pid_list.items():
path_list = pred_path[uid][pid]
candidates.extend(path_list)
# Create a bin for every path type and insert in them every path of that type
bins = {}
for candidate in candidates:
candidate_path_type = get_path_type(candidate[-1])
if get_path_type(candidate[-1]) not in bins:
bins[candidate_path_type] = []
bins[candidate_path_type].append(candidate)
# Sort every path type bin by a mixed score based on explanation time relevance and item relevance
for bin_type, path_list in bins.items():
path_list.sort(key=lambda x: (x[0] * (1-alpha)) + (LIR_single(path_data, x[-1]) * alpha),
reverse=True)
ptype_seen = set()
while len(best_candidates) < 10:
best_type = ""
best_score = -1
for bin_type, path_list in bins.items():
if len(path_list) == 0: continue
candidate = path_list[0]
rec_pid = get_rec_pid(path_list[0][-1])
while rec_pid in best_candidates_pids:
path_list.pop(0)
if len(path_list) == 0: break
candidate = path_list[0]
rec_pid = get_rec_pid(candidate[-1])
if len(path_list) == 0: continue
bonus = alpha if get_path_type(candidate[-1]) not in ptype_seen else 0
score = candidate[0] + bonus
if score > best_score:
best_score = score
best_type = get_path_type(candidate[-1])
if best_type == "":
# print("Less than 10: {}".format(len(best_candidates)))
break
best_candidates.append(bins[best_type][0])
best_candidates_pids.add(get_rec_pid(bins[best_type][0][-1]))
ptype_seen.add(best_type)
bins[best_type].pop(0)
if len(bins[best_type]) == 0:
bins.pop(best_type)
# Rearrange the topk based on path_score
best_candidates.sort(key=lambda candidate: candidate[0], reverse=True)
# Update the topk with the reranked one
path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
path_data.uid_pid_explanation[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}
return path_data
#ETD+SEP Alpha optimization
def weighted_opt_ETD_SEP(path_data, alpha):
pred_path = path_data.pred_paths
for uid, pid_list in pred_path.items():
candidates = []
best_candidates = []
best_candidates_pids = set()
for pid, path_list in pid_list.items():
path_list = pred_path[uid][pid]
candidates.extend(path_list)
# Create a bin for every path type and insert in them every path of that type
bins = {}
for candidate in candidates:
candidate_path_type = get_path_type(candidate[-1])
if get_path_type(candidate[-1]) not in bins:
bins[candidate_path_type] = []
bins[candidate_path_type].append(candidate)
# Sort every path type bin by a mixed score based on explanation time relevance and item relevance
for bin_type, path_list in bins.items():
path_list.sort(key=lambda x: (x[0] * (1-alpha)) + (SEP_single(path_data, x[-1]) * alpha),
reverse=True)
ptype_seen = set()
while len(best_candidates) < 10:
best_type = ""
best_score = -1
for bin_type, path_list in bins.items():
if len(path_list) == 0: continue
candidate = path_list[0]
rec_pid = get_rec_pid(path_list[0][-1])
while rec_pid in best_candidates_pids:
path_list.pop(0)
if len(path_list) == 0: break
candidate = path_list[0]
rec_pid = get_rec_pid(candidate[-1])
if len(path_list) == 0: continue
bonus = alpha if get_path_type(candidate[-1]) not in ptype_seen else 0
score = candidate[0] + bonus
if score > best_score:
best_score = score
best_type = get_path_type(candidate[-1])
if best_type == "":
# print("Less than 10: {}".format(len(best_candidates)))
break
best_candidates.append(bins[best_type][0])
best_candidates_pids.add(get_rec_pid(bins[best_type][0][-1]))
ptype_seen.add(best_type)
bins[best_type].pop(0)
if len(bins[best_type]) == 0:
bins.pop(best_type)
# Rearrange the topk based on the metric
best_candidates.sort(key=lambda x: x[0],
reverse=True)
# Update the topk with the reranked one
path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
path_data.uid_pid_explanation[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}
return path_data
#ETD+SEP+LIR Alpha optimization
def weighted_opt_ETD_SEP_LIR(path_data, alpha):
pred_path = path_data.pred_paths
for uid, pid_list in pred_path.items():
candidates = []
best_candidates = []
best_candidates_pids = set()
for pid, path_list in pid_list.items():
path_list = pred_path[uid][pid]
candidates.extend(path_list)
# Create a bin for every path type and insert in them every path of that type
bins = {}
for candidate in candidates:
candidate_path_type = get_path_type(candidate[-1])
if get_path_type(candidate[-1]) not in bins:
bins[candidate_path_type] = []
bins[candidate_path_type].append(candidate)
# Sort every path type bin by a mixed score based on explanation time relevance and item relevance
for bin_type, path_list in bins.items():
path_list.sort(
key=lambda x: (x[0] * (1 - alpha)) + ((LIR_single(path_data, x[-1]) + SEP_single(path_data, x[-1])) * alpha),
reverse=True)
ptype_seen = set()
while len(best_candidates) < 10:
best_type = ""
best_score = -1
for bin_type, path_list in bins.items():
if len(path_list) == 0: continue
candidate = path_list[0]
rec_pid = get_rec_pid(path_list[0][-1])
while rec_pid in best_candidates_pids:
path_list.pop(0)
if len(path_list) == 0: break
candidate = path_list[0]
rec_pid = get_rec_pid(candidate[-1])
if len(path_list) == 0: continue
bonus = alpha if get_path_type(candidate[-1]) not in ptype_seen else 0
score = candidate[0] + bonus
if score > best_score:
best_score = score
best_type = get_path_type(candidate[-1])
if best_type == "":
# print("Less than 10: {}".format(len(best_candidates)))
break
best_candidates.append(bins[best_type][0])
best_candidates_pids.add(get_rec_pid(bins[best_type][0][-1]))
ptype_seen.add(best_type)
bins[best_type].pop(0)
if len(bins[best_type]) == 0:
bins.pop(best_type)
# Rearrange the topk based on the metric
best_candidates.sort(key=lambda x: x[0],
reverse=True)
# Update the topk with the reranked one
path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
path_data.uid_pid_explanation[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}
return path_data