-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathLZ78Decoder.py
84 lines (63 loc) · 2.39 KB
/
LZ78Decoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from base64 import decode
from symtable import Symbol
import time
from LZ78Coder import LZ78_file
filenames = ["cantrbry/kennedy.xls"] #, "cantrbry/fields.c", "cantrbry/sum"]
shortfilenames = ["cantrbry/alice29.txt", "cantrbry/asyoulik.txt", "cantrbry/cp.html", "cantrbry/fields.c", "cantrbry/grammar.lsp", "cantrbry/kennedy.xls", "cantrbry/lcet10.txt",
"cantrbry/plrabn12.txt", "cantrbry/ptt5", "cantrbry/sum", "cantrbry/xargs.1"]
bigfilenames = ["large/bible.txt", "large/E.coli", "large/world192.txt"]
BITS = 10
def read_file(filename):
# Load bytes from file and count symbol occurances
with open(filename, "rb") as f: # 'rb' means read binary
data = f.read()
return data
def LZ78_decoder(filename, code):
datasize = len(code)
decoded = []
i = 0
while i <= len(code)-1:
index_symbol = code[i]
if index_symbol[0] == 0 or index_symbol[0] == 2**BITS-1:
if index_symbol[1] != None : # not None
decoded.append(index_symbol[1])
i += 1
elif index_symbol[0] == 2**BITS-1: # clear code and start over
code = code[i+1:]
i = 0
print(f'dec {filename} @ {len(decoded)}')
elif index_symbol[1] == None:
i += 1
else:
symbols = [index_symbol[1]]
next_index_symbol = code[index_symbol[0]]
while next_index_symbol[0] != 0:
symbols.insert(0, next_index_symbol[1])
next_index_symbol = code[next_index_symbol[0]]
symbols.insert(0, next_index_symbol[1])
# add symbols to decoded
for symbol in symbols:
decoded.append(symbol)
i += 1
return decoded
for filename in bigfilenames:
# Coding
start = time.time()
list = [(0, None)]
code = LZ78_file(filename, list)
end = time.time()
print("Coding ", filename, " took ", end-start, " seconds!\n")
# Decoding
tic = time.time()
decoded = LZ78_decoder(filename, code)
tec = time.time()
#%%
nr_symbols = 500
print(f'\nFirst {nr_symbols} symbols is:\n\n')
for i in range(0, nr_symbols):
print(chr(decoded[i]), end="")
#%%
#for i in range(len(orig)):
# if(orig[i] != decoded[i]):
# print(decoded[i])
print("\n\nDecoding took ", tec-tic, " seconds")