-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsqliteToPickle.py
executable file
·69 lines (49 loc) · 1.34 KB
/
sqliteToPickle.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python3
import sys
import sqlite3
import json
import time
import os
import pickle
if len(sys.argv) < 2:
print("missing path to db file")
print('usage: %s <path to sqlite3 db> [outfile]' % sys.argv[0])
exit(0)
dbpath = sys.argv[1]
outfile = None
if len(sys.argv) >= 3:
outfile = sys.argv[2]
MINHASH_PERMS = 64
THRESHOLD = 0.5
VERBOSE = False
start = time.time()
'''
<number:set_of_filename:funcname>
'''
minhashdb = {}
# connect to db
# dbpath = os.path.join(DATADIR,"db", )
con = sqlite3.connect(dbpath)
cur = con.cursor()
allfilefuncs = set()
rows = cur.execute("SELECT filename,fname,hashvals FROM funcminhash WHERE numperms=?", (MINHASH_PERMS,))
for r in rows:
filename = r[0]
fname = r[1]
fname_filename = filename + ":" + fname
allfilefuncs.add(fname_filename)
hashvalStr = r[2]
# hashvals = [ int(i) for i in hashvalStr.split(',') ]
for i in hashvalStr.split(','):
i = int(i)
if minhashdb.get(i) == None:
minhashdb[i] = set()
minhashdb[i].add(fname_filename)
# print(f"{filename}:{fname}")
print(f"finished loading minhashdb, elapsed {time.time() - start}")
print(f"{len(allfilefuncs)} filename:funcname total")
if outfile == None:
outfile = os.path.basename(dbpath)+".pkl"
with open(outfile, "wb") as f:
f.write(pickle.dumps(minhashdb))
print(f"stored json file at {outfile}, elapsed {time.time() - start}")