-
Notifications
You must be signed in to change notification settings - Fork 1
/
smote.py
34 lines (25 loc) · 1.05 KB
/
smote.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from imblearn.over_sampling import SMOTE
import numpy as np
import warnings
from constants import NUM_SLEEP_STAGES
from utils import remove_nan
warnings.filterwarnings("ignore") #suppressing warnings, don't understand what they are but could be important
n = 25000
desired_samples = {0:n, 1:n, 2:n, 3:n, 4:n, 5:n}
for i in range(NUM_SLEEP_STAGES):
print(f"LABEL {i}")
data = np.load(f'/content/cleaned_data/clf{i}.npy', allow_pickle=True)
X = np.array(list(data[:, 1]), dtype=np.float)
y = np.array(data[:, 0]).astype('int')
desired_samples[i] *= 5
#print(desired_samples)
print(f"y: {np.unique(y, return_counts=True)}")
sm = SMOTE(random_state=42, sampling_strategy=desired_samples)
X_smote, y_smote = sm.fit_resample(X, y)
print(f"y_smote: {np.unique(y_smote, return_counts=True)}")
print("###########################################################")
desired_samples[i] //= 5
data = []
for features, label in zip(X_smote, y_smote):
data.append((label, features))
np.save(f"/content/cleaned_data/clf_smote{i}.npy", data)