forked from Graph-COM/Neural_Higher-order_Pattern_Prediction
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cut_dataset.py
29 lines (19 loc) · 772 Bytes
/
cut_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# cut the large dataset
import pickle
file_path = './saved_triplets/threads-ask-ubuntu/threads-ask-ubuntu_0.4_0.1'
with open(file_path+'/triplets_ori.npy', 'rb') as f:
x = pickle.load(f)
cls_tri, opn_tri, wedge, nega, set_all_nodes = x[0], x[1], x[2], x[3], x[4]
print("close tri", len(cls_tri[0]))
print("open tri", len(opn_tri[0]))
print("wedge", len(wedge[0]))
print("nega", len(nega[0]))
n = len(nega[0])
import numpy as np
p = 1000
idx = np.random.choice(range(n), int(n/p), replace=False)
idx_sorted = np.sort(idx, kind='quicksort')
nega_new = [nega[0][idx], nega[1][idx], nega[2][idx], nega[3][idx], nega[4][idx]]
with open(file_path+'/triplets.npy', 'wb') as f:
x = np.array([cls_tri, opn_tri, wedge, nega_new, set_all_nodes])
np.save(f, x)