-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathalgorithms.py
executable file
·584 lines (485 loc) · 19.9 KB
/
algorithms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
import random
import math
import numpy as np
import time
import matplotlib.pyplot as plt
import collections
from operator import add
# An algorithm takes as an input a sequence of requests and the cache size k.
# It returns its full history of execution, i.e. a list of length 1+len(requests),
# with the i-th element being a list of length k representing the cache at the
# moment before the i-th request arrives.
# #################### Predictions start here ####################
# Each prediction returns a list of len(requests) integers, which predicts the next time the current request appears
# Synthetic predictions.
# Computes the next occurence of each element, then add a random noise following a lognormal distribution
# Note: the noise adds an offset, so one should only use the relative order of such predictions, not their exact value
def PredRandom(requests,sigma):
infinity = len(requests)
last_time = dict()
next_occ = [infinity * 10000] * len(requests)
for t, request in enumerate(requests):
if request in last_time:
next_occ[last_time[request] ] = t
last_time[request] = t
noise = np.random.lognormal(0,sigma,len(requests))
return noise + next_occ
# emulates LRU predictions
def PredLRU(requests):
infinity = len(requests)
pred = []
for t, request in enumerate(requests):
pred.append(-t)
return pred
# Prediction PLECO tailored for the BK dataset (DOI: 10.1145/2566486.2568018)
# Note: expensive computations, quadratic in len(requests)
def PredPLECO_BK(requests):
infinity = len(requests)
pred = [] # the predictions we give (1 / probability(being the next element))
prev_occs = {} # list of previous occurences of each element
weights = [] # weights[i] = weight of the element requested i steps earlier (weights[0] -> current element)
sum_weights = 0
for t, request in enumerate(requests,1): # t starts at 1
weights.append((t+10)**(-1.8)*math.exp(-t/670)) # weights starts at t=1
sum_weights += weights[-1]
if request not in prev_occs:
prev_occs[request] = []
prev_occs[request].append(t)
prob = sum(weights[t-i] for i in prev_occs[request]) / sum_weights # probability that request is the next occurence according to PLECO: t-i in [0;t-1]
pred.append(1/prob + t-1) # predicted next occurence
return pred
# Prediction POPU defined in the main paper
def PredPopularity(requests):
infinity = len(requests)
pred = []
count = {}
for t, request in enumerate(requests,1):
if request not in count:
count[request] = 0
count[request] += 1
pred.append(t + t/count[request])
return pred
# #################### Predictions end here ####################
# #################### Algorithms start here ####################
# compute the optimal sequence trusting the predictions
# error_probability adds a probability to predict a random eviction instead
def FollowPred(requests, k, pred, error_probability=0.0):
cache = [None] * k
next_pred = [math.inf] * k
history = [tuple(cache),]
for t, (request,next) in enumerate(zip(requests,pred)):
if request in cache:
index_to_evict = cache.index(request)
elif None in cache:
index_to_evict = cache.index(None)
elif request not in cache:
index_to_evict = next_pred.index(max(next_pred))
if random.random() < error_probability:
index_to_evict = random.randint(0, k-1)
cache[index_to_evict] = request
next_pred[cache.index(request)] = next
history.append(tuple(cache))
return history
# compute the optimal solution, by using previous functions
def OPT(requests, k, pred=[]):
return FollowPred(requests, k, PredRandom(requests,0))
# Marking algorithm that evicts the furthest predicted unmarked element
def FollowPredMarking(requests, k, pred, error_probability=0.0):
cache = [None] * k
cache_preds = [-1] * k
unmarked = list(range(k))
history = [tuple(cache),]
phase_elements = []
for t, request in enumerate(requests):
if request not in phase_elements:
phase_elements.append(request)
if request in cache:
index = cache.index(request)
elif None in cache:
index = cache.index(None)
else:
if len(phase_elements) == k+1:
assert not unmarked, unmarked
unmarked = list(range(k))
phase_elements = [request]
assert unmarked, len(phase_elements)
index = max((cache_preds[i],i) for i in unmarked)[1]
if random.random() < error_probability:
index = random.choice(unmarked)
cache[index] = request
if index in unmarked:
unmarked.remove(index)
cache_preds[index] = pred[t]
history.append(tuple(cache))
return history
# optimal marking algorithm
def OPTMarking(requests, k):
return FollowPredMarking(requests, k, PredRandom(requests,0))
# evicts random elements
def Rand(requests, k, pred=[]):
cache = [None] * k
history = [tuple(cache),]
for t, request in enumerate(requests):
if request not in cache:
cache[random.randrange(0,len(cache))] = request
history.append(tuple(cache))
return history
# evicts the least recently used element
def LRU(requests, k, pred=[]):
cache = [None] * k
time_used = [-1] * k
history = [tuple(cache),]
for t, request in enumerate(requests):
if request not in cache:
cache[time_used.index(min(time_used))] = request
time_used[cache.index(request)] = t
history.append(tuple(cache))
return history
# log(k)-competitive algorithm
def Marker(requests, k, pred=[]):
cache = [None] * k
unmarked = []
history = [tuple(cache),]
for t, request in enumerate(requests):
if request not in cache:
if not unmarked:
unmarked = list(range(k))
cache[random.choice(unmarked)] = request
index = cache.index(request)
if index in unmarked:
unmarked.remove(index)
history.append(tuple(cache))
return history
# algorithm from Lykouris & Vassilvitsky (https://dblp.org/rec/conf/icml/LykourisV18.html)
# named L&V in the paper
PRED_MARKER_GAMMA = 1.
def PredMarker(requests, k, pred):
Hk = 1.
for i in range(2, k+1):
Hk += 1/i
cache = [None] * k
unmarked = list(range(k))
cache_preds = [-1] * k
clean_c = 0
stale = {}
chain_lengths = []
chain_reps = []
history = [tuple(cache),]
for t, request in enumerate(requests):
if request in cache:
index_to_evict = cache.index(request)
elif None in cache:
index_to_evict = cache.index(None)
else:
if not unmarked:
clean_c = 0
stale = set(cache)
unmarked = list(range(k))
chain_lengths = []
chain_reps = []
if request not in stale:
clean_c += 1
index_to_evict = max((cache_preds[i],i) for i in unmarked)[1]
chain_lengths.append(1)
chain_reps.append(cache[index_to_evict])
else:
assert request in chain_reps
c = chain_reps.index(request)
chain_lengths[c] += 1
if chain_lengths[c] <= Hk * PRED_MARKER_GAMMA:
index_to_evict = max((cache_preds[i],i) for i in unmarked)[1]
else:
index_to_evict = random.choice(unmarked)
chain_reps[c] = cache[index_to_evict]
cache[index_to_evict] = request
cache_preds[index_to_evict] = pred[t]
if index_to_evict in unmarked:
unmarked.remove(index_to_evict)
history.append(tuple(cache))
return history
# Algorithm from Rohatgi (https://doi.org/10.1137/1.9781611975994.112)
# weaker competitive ratio than LNonMarker
def LMarker(requests, k, pred):
cache = [None] * k
unmarked = []
cache_preds = [-1] * k
stale = {}
history = [tuple(cache),]
for t, request in enumerate(requests):
if request in cache:
index_to_evict = cache.index(request)
elif None in cache:
index_to_evict = cache.index(None)
else:
if not unmarked:
stale = set(cache)
unmarked = list(range(k))
if request in stale:
index_to_evict = random.choice(unmarked)
else:
index_to_evict = max((cache_preds[i],i) for i in unmarked)[1]
cache[index_to_evict] = request
cache_preds[index_to_evict] = pred[t]
if index_to_evict in unmarked:
unmarked.remove(index_to_evict)
history.append(tuple(cache))
return history
# Algorithm from Rohatgi (https://doi.org/10.1137/1.9781611975994.112)
# Better competitive ratio than LMarker but is not robust: it needs to be combined with Marker
def LNonMarker_nonrobust(requests, k, pred):
cache = [None] * k
unmarked = []
cache_preds = [-1] * k
stale = {}
history = [tuple(cache),]
evictions = {}
phase_elements = []
for t, request in enumerate(requests):
if request not in phase_elements:
phase_elements.append(request)
if len(phase_elements) == k+1:
stale = set(cache)
unmarked = list(range(k))
evictions = {}
phase_elements = [request]
if request in cache:
index_to_evict = cache.index(request)
elif None in cache:
index_to_evict = cache.index(None)
else:
assert unmarked
if request in stale:
if evictions[request] not in stale:
index_to_evict = random.choice(range(k))
else:
index_to_evict = random.choice(unmarked)
else:
index_to_evict = max((cache_preds[i],i) for i in unmarked)[1]
evictions[cache[index_to_evict]] = request
cache[index_to_evict] = request
cache_preds[index_to_evict] = pred[t]
if index_to_evict in unmarked:
unmarked.remove(index_to_evict)
history.append(tuple(cache))
return history
# Implementation of the new algorithm Trust&Doubt
# pred must be here the output (i.e., history) of a lazy algorithm (i.e., evicting a single element per request)
# This implementation is not lazy, it has to be used as a subroutine in the function LazyTrustDoubt
# different eviction policies
TrustDoubtLRUevict = True
TrustDoubtAncientEvictPredLRU = True
def TrustDoubt(requests, k, pred):
cache = [None] * k
priorities = list(range(k)) # priorities[i] is the priority of stale[i]
history = [tuple(cache),]
new_elements = [] # elements arrived in this phase
last_seen = dict()
for t, request in enumerate(requests):
last_seen[request] = t
if request not in new_elements:
new_elements.append(request)
# start of a new phase
if len(new_elements) > k or t == 0:
ancient = set(cache) - set(new_elements) # cache elements that were not requested this phase, nor in the previous one
random.shuffle(priorities) # k priorities even is stale can be smaller, this does not matter
clean_evict = dict() # clean_evict[q] = p_q in the paper, so clean_evict.values() = T | D, where T = {p_q, trusted[q]=true} and D = {p_q, trusted[q] = false}
trusted = dict()
clean_pages = [] # C in the paper
stale = list(set(cache) - set(ancient))
unmarked_stale = list(stale)
arrival = dict() # first request time of each element in the current phase
interval_arrivals = dict() # number of arrivals at the start of the current interval
interval_length = dict() # length of current interval
new_elements = [request] # elements arrived in this phase
if request not in arrival:
interval_arrivals[request] = len(arrival)
arrival[request] = t
if request in unmarked_stale:
unmarked_stale.remove(request)
# in the first part of a phase, we evict ancient elements
if ancient:
if request in ancient:
ancient.remove(request)
if request not in cache:
if None in cache:
cache[cache.index(None)] = request
else:
# evict some ancient page, several policies are possible
if TrustDoubtAncientEvictPredLRU:
candidates = ancient - set(pred[t+1])
if not candidates:
candidates = ancient
else:
candidates = ancient
evict = min((last_seen[x] if x in last_seen else -1, x) for x in candidates)[1] # LRU rule
assert evict in cache
ancient.remove(evict)
cache[cache.index(evict)] = request
else:
# ancient is now empty
if request not in stale and arrival[request] == t: # clean pages are defined only when ancient is empty. They are non-stale element that arrived
clean_pages.append(request)
# step 1: evict the lowest priority
if request not in cache:
assert set(unmarked_stale) & set(cache)
index_to_evict = min((priorities[stale.index(p)], cache.index(p)) for p in unmarked_stale if p in cache)[1]
cache[index_to_evict] = request
# step 2: initialize p_q and trusted
if request in clean_pages and arrival[request] == t:
# choose a page unmarked stale or arrived at this phase, not in the predictor's cache, and not in T, not in D
candidates = list(((set(unmarked_stale) | set(new_elements)) - set (pred[t+1])) - set(clean_evict.values()))
if not candidates:
assert None in cache or None in history[t-1]
candidates = [None]
if TrustDoubtLRUevict:
clean_evict[request] = min((last_seen[x] if x in last_seen else -1, x) for x in candidates)[1]
else:
clean_evict[request] = random.choice(candidates)
trusted[request] = True
interval_length[request] = 1
# step 3: if we charged the eviction of the current request was evicted by page q, reset the page that q evicted, without trusting the predictor
for q, pq in clean_evict.items():
if request == pq:
candidates = list(((set(unmarked_stale) | set(new_elements)) - (set (pred[t+1]))) - (set(clean_evict.values())))
assert candidates
if TrustDoubtLRUevict:
clean_evict[q] = min((last_seen[x] if x in last_seen else -1, x) for x in candidates)[1]
else:
clean_evict[q] = random.choice(candidates)
trusted[q] = False
interval_arrivals[q] = len(arrival)
break
# step 4: regularly (i.e., after some number of arrivals), evict p_q and put back in the cache an unmarked page
if arrival[request] == t:
for q in clean_pages:
if len(arrival) - interval_arrivals[q] == interval_length[q]:
interval_arrivals[q] = len(arrival)
if trusted[q] == False:
interval_length[q] *= 2
trusted[q] = True
assert q in clean_evict, shorten([q])
if clean_evict[q] in cache and clean_evict[q] != None:
index = cache.index(clean_evict[q])
cache[index] = None
candidates = set(unmarked_stale) - set(cache) # unmarked stale page not in cache
T = set([ clean_evict[x] for x in trusted if trusted[x] ])
candidates = candidates - T # keep only pages not in T
assert candidates
page = max((priorities[i],p) for i,p in enumerate(stale) if p in candidates)[1] # take the highest priority
cache[index] = page
assert request in cache
history.append(tuple(cache))
return history
# Lazy-fication of the above algorithm. This is the algorithm that we consider.
def LazyTrustDoubt(requests, k, pred):
goal = TrustDoubt(requests, k, pred)
cache = [None]*k
history = [tuple(cache),]
last_used = [-1] * k
for t, request in enumerate(requests):
cache = Lazy_update(cache, goal[t+1], request, last_used, t)
history.append(cache)
return history
# #################### Algorithms end here ####################
# #################### Combining schemes start here ####################
# cost to move from cache1 to cache2
def Cache_cost(cache1, cache2):
return len((set(cache2)-{None})-(set(cache1)-{None}))
# Procedure used to obtain a lazy algorithm (1 eviction / request) from a non-lazy algorithm
# we currently have cache1, we serve the request and aim at simulating cache2
# we evict a single element from cache1 not in cache2 to serve the request at minimal cost while using cache2 as an advice
# we choose the LRU element, and use the parameter last used (last time each element of cache 1 was used)
def Lazy_update(cache1, cache2, request, last_used, t):
cache = list(cache1)
if (request in cache):
return cache
if None in cache:
index_to_evict = cache.index(None)
else:
candidates = list(set(cache) - set(cache2))
assert len(candidates)>0
if last_used:
assert len(last_used) == len(cache1)
index_to_evict = min((last_used[cache1.index(i)],cache1.index(i)) for i in candidates)[1]
last_used[index_to_evict] = t
else:
index_to_evict = cache.index(random.choice(candidates))
cache[index_to_evict] = request
return cache
# Combine randomly 2 algorithms
# algs: list of algorithms to combine
# parameterized by epsilon
def Combine_rand(requests, k, pred, algs, epsilon=0.5, LAZY=True):
beta = 1.0 - 0.5 * epsilon
m = len(algs)
assert m==2
histories = list(map(lambda x: x(requests, k, pred), algs))
weights = [1] * len(algs)
probs = [1/len(algs)] * len(algs)
history = [tuple([None] * k),]
last_used = [-1] * k
cur_alg = random.randrange(0,len(algs))
for t, request in enumerate(requests):
loss = list(map(lambda x: Cache_cost(x[t],x[t+1]), histories))
new_weights = [w*(beta)**l for w,l in zip(weights,loss)]
total_weights = sum(new_weights)
new_probs = [w / total_weights for w in new_weights]
if (new_probs[cur_alg] < probs[cur_alg]):
cur_alg = cur_alg if (random.random() > (probs[cur_alg] - new_probs[cur_alg])/probs[cur_alg]) else 1-cur_alg
probs = new_probs
weights = new_weights
weights = probs # prevents floating point errors: always normalize the weights
if (LAZY): # evict a random element which is in our cache but not in the target cache
history.append(tuple(Lazy_update(history[t], histories[cur_alg][t+1], request, last_used, t)))
else:
history.append(histories[cur_alg][t+1])
return history
# Combine deterministically 2 algorithms
# algs: list of algorithms to combine
# parameterized by gamma < 1
def Combine_det(requests, k, pred, algs, gamma=0.01, LAZY=True):
m = len(algs)
cur_alg = 0
costs = [0] * m
bound = 1.
histories = list(map(lambda x: x(requests, k, pred), algs))
history = [tuple([None] * k),]
last_used = [-1] * k
for t, request in enumerate(requests):
costs = list(map(add, costs , list(map( lambda x: Cache_cost(x[t], x[t+1]), histories)))) # add the new cost for each algorithm
while (costs[cur_alg] > bound):
cur_alg = (cur_alg + 1) % m
bound *= 1. + gamma / (m - 1)
if (LAZY):
history.append(tuple(Lazy_update(history[t], histories[cur_alg][t+1], request, last_used, t)))
else:
history.append(histories[cur_alg][t+1])
return history
### predefined combining schemes
def Combrand_lambda(algs, epsilon=0.5):
algorithm = lambda requests, k, pred=[]: Combine_rand(requests, k, pred, algs, epsilon)
algorithm.__name__ = 'Rnd(' + ','.join(alg.__name__ for alg in algs) + ')' + ('[eps=%f]' % epsilon)
return algorithm
def Combdet_lambda(algs, gamma=0.01):
algorithm = lambda requests, k, pred=[]: Combine_det(requests, k, pred, algs, gamma)
algorithm.__name__ = 'Det(' + ','.join(alg.__name__ for alg in algs) + ')' + ('[gamma=1+%f]' % gamma)
return algorithm
# Rohatgi's best algorithm
def LNonMarker(requests, k, pred):
return Combine_det(requests, k, pred, (LNonMarker_nonrobust, Marker))
# #################### Combining schemes end here ####################
# #################### Utilities start here ####################
def VerifyOutput(requests, k, history):
assert len(history) == len(requests) + 1
for cache in history:
assert len(cache) == k
for element in history[0]:
assert element is None
for request, cache in zip(requests, history[1:]):
assert request in cache
def Cost(history):
cost = 0
for cache_prev, cache_next in zip(history[:-1], history[1:]):
cost += Cache_cost(cache_prev,cache_next)
return cost
# #################### Utilities end here ####################