-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcar_parse_benchmark.py
88 lines (67 loc) · 2.11 KB
/
car_parse_benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import io
import sys
import time
from cbrrr import decode_dag_cbor, encode_dag_cbor, CID
ATJSON_MODE = True
sys.setrecursionlimit(99999999)
# LEB128 (has not been strictly tested!)
def parse_varint(stream):
n = 0
shift = 0
while True:
val = stream.read(1)[0]
n |= (val & 0x7F) << shift
if not val & 0x80:
return n
shift += 7
enctime = 00
dectime = 0
def parse_car(stream, length):
global enctime
global dectime
header_len = parse_varint(stream)
header_bytes = stream.read(header_len)
assert len(header_bytes) == header_len
car_header = decode_dag_cbor(header_bytes)
assert car_header.get("version") == 1
assert len(car_header.get("roots", [])) == 1
root = car_header["roots"][0]
nodes = {}
while stream.tell() != length:
block_len = parse_varint(stream)
cid = CID(
stream.read(36)
) # XXX: this needs to be parsed properly, length might not be 36
assert (
cid.is_cidv1_dag_cbor_sha256_32()
) # this is enough to validate atproto-flavoured CIDs
block_data = stream.read(block_len - 36)
assert len(block_data) == block_len - 36
# content_hash = hashlib.sha256(block_data).digest()
# assert(cid_raw.endswith(content_hash))
start = time.time()
block = decode_dag_cbor(block_data, atjson_mode=ATJSON_MODE)
# block = libipld.decode_dag_cbor(block_data)
dectime += time.time() - start
start = time.time()
roundtrip = encode_dag_cbor(block, atjson_mode=ATJSON_MODE)
enctime += time.time() - start
assert block_data == roundtrip
# print(block)
nodes[cid] = block
return root, nodes
if __name__ == "__main__":
import sys
car = open(sys.argv[1], "rb").read()
start_time = time.time()
root, nodes = parse_car(io.BytesIO(car), len(car))
dec_speed = (len(car) / (1024 * 1024)) / dectime
print(f"Parsed {len(car)} bytes at {dec_speed:.2f}MB/s")
enc_speed = (len(car) / (1024 * 1024)) / enctime
print(f"Encoded {len(car)} bytes at {enc_speed:.2f}MB/s")
# start = time.time()
# libipld.decode_car(car)
# duration = time.time()-start
# car_speed = (len(car)/(1024*1024))/duration
# print(f"libipld.decode_car {len(car)} bytes at {car_speed:.2f}MB/s")
# print(nodes[root])