-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbenchmark.py
135 lines (97 loc) · 3.38 KB
/
benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# Run: "mypyc rsv"
# Then run benchmark
# Remove all binaries to revert back.
# Don't check binaries into git.
from io import BytesIO, StringIO
import timeit
import json
from rsv import rsv
import csv
from faker import Faker
import random
# Initialize the data structure
other_data = [["2D"]]
# Function to generate a "Pts" row
def generate_pts_row():
return ["Pts"] + [str(random.choice([1, -1])) for _ in range(8)]
# Function to generate a "Tris" row
def generate_tris_row():
indices = random.sample(range(0, 4), 4)
return ["Tris"] + [str(indices[0]), str(indices[1]), str(indices[2]), str(indices[2]), str(indices[3]),
str(indices[0])]
print("Generate 75,000 rows")
for _ in range(500): # Each block of "Pts" and "Tris" counts as 2 rows
other_data.append(generate_pts_row())
other_data.append(generate_tris_row())
# Initialize Faker
faker = Faker()
print("Generate 100,000 rows of data")
people_data = [["FirstName", "LastName", "Age", "PlaceOfBirth"]]
for _ in range(500):
first_name = faker.first_name()
last_name = faker.last_name()
age = str(random.randint(1, 100)) if random.random() > 0.1 else None # 10% chance of no age
place_of_birth = faker.city()
people_data.append([first_name, last_name, age, place_of_birth])
sample_data = [
['Name', 'ID', 'Description', 'Data'],
['Romanin', '0', '', None, None]
]
more_data = [
["FirstName", "LastName", "Age", "PlaceOfBirth"],
["William", "Smith", "30", "Boston"],
["Olivia", "Jones", "27", "San Francisco"],
["Lucas", "Brown", None, "Chicago"]
]
emoji_data = [
["Hello", "🌎"]
]
def csv_workload():
for data in [people_data, sample_data, other_data, more_data, emoji_data]:
buffer = StringIO()
writer = csv.writer(buffer)
writer.writerows(data)
buffer.seek(0)
reader = csv.reader(buffer)
rows = list(reader)
buffer = StringIO()
writer = csv.writer(buffer)
writer.writerows(data)
buffer.seek(0)
reader = csv.reader(buffer)
rows = list(reader)
def workload():
for data in [people_data, sample_data, other_data, more_data, emoji_data]:
# Change this to StringIO
buffer = BytesIO()
rsv.dump(data, buffer)
buffer.seek(0)
rows = rsv.load(buffer)
buffer = BytesIO()
rsv.dump(data, buffer)
buffer.seek(0)
rows = rsv.load_split(buffer)
def json_workload():
for data in [people_data, sample_data, other_data, more_data, emoji_data]:
buffer = StringIO()
json.dump(data, buffer)
buffer.seek(0)
rows = json.load(buffer)
buffer = StringIO()
json.dump(data, buffer)
buffer.seek(0)
rows = json.load(buffer)
print("Starting test...")
time_json_workload = timeit.timeit(json_workload, number=1000)
print(f"json workload: {time_json_workload}")
time_workload = timeit.timeit(workload, number=1000)
print(f"workload: {time_workload}")
time_csv_workload = timeit.timeit(csv_workload, number=1000)
print(f"csv workload: {time_csv_workload}")
ratio = time_json_workload / time_workload
print(f"jason/rsv ratio: {ratio}")
# Best comparison against csv, since rsv is a csv-like format
# json workload: 5.238934499910101
# rsv workload: 5.0847148001194 # mypyc speedups
# rsv workload: 8.26404339983128 # pure python
# csv workload: 2.414228399982676