forked from pbashivan/EEGLearn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
132 lines (111 loc) · 4.97 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
__author__ = 'Pouya Bashivan'
import math as m
import numpy as np
np.random.seed(123)
import scipy.io
from sklearn.decomposition import PCA
def cart2sph(x, y, z):
"""
Transform Cartesian coordinates to spherical
:param x: X coordinate
:param y: Y coordinate
:param z: Z coordinate
:return: radius, elevation, azimuth
"""
x2_y2 = x**2 + y**2
r = m.sqrt(x2_y2 + z**2) # r
elev = m.atan2(z, m.sqrt(x2_y2)) # Elevation
az = m.atan2(y, x) # Azimuth
return r, elev, az
def pol2cart(theta, rho):
"""
Transform polar coordinates to Cartesian
:param theta: angle value
:param rho: radius value
:return: X, Y
"""
return rho * m.cos(theta), rho * m.sin(theta)
def augment_EEG(data, stdMult, pca=False, n_components=2):
"""
Augment data by adding normal noise to each feature.
:param data: EEG feature data as a matrix (n_samples x n_features)
:param stdMult: Multiplier for std of added noise
:param pca: if True will perform PCA on data and add noise proportional to PCA components.
:param n_components: Number of components to consider when using PCA.
:return: Augmented data as a matrix (n_samples x n_features)
"""
augData = np.zeros(data.shape)
if pca:
pca = PCA(n_components=n_components)
pca.fit(data)
components = pca.components_
variances = pca.explained_variance_ratio_
coeffs = np.random.normal(scale=stdMult, size=pca.n_components) * variances
for s, sample in enumerate(data):
augData[s, :] = sample + (components * coeffs.reshape((n_components, -1))).sum(axis=0)
else:
# Add Gaussian noise with std determined by weighted std of each feature
for f, feat in enumerate(data.transpose()):
augData[:, f] = feat + np.random.normal(scale=stdMult*np.std(feat), size=feat.size)
return augData
def augment_EEG_image(image, stdMult, pca=False, n_components=2):
"""
Augment data by adding normal noise to each feature.
:param image: EEG feature data as a a colored image [n_samples, n_colors, W, H]
:param stdMult: Multiplier for std of added noise
:param pca: if True will perform PCA on data and add noise proportional to PCA components.
:param n_components: Number of components to consider when using PCA.
:return: Augmented data as a matrix (n_samples x n_features)
"""
augData = np.zeros((data.shape[0], data.shape[1], data.shape[2] * data.shape[3]))
for c in xrange(image.shape[1]):
reshData = np.reshape(data['featMat'][:, c, :, :], (data['featMat'].shape[0], -1))
if pca:
augData[:, c, :] = augment_EEG(reshData, stdMult, pca=True, n_components=n_components)
else:
augData[:, c, :] = augment_EEG(reshData, stdMult, pca=False)
return np.reshape(augData, data['featMat'].shape)
def load_data(data_file):
"""
Loads the data from MAT file. MAT file should contain two
variables. 'featMat' which contains the feature matrix in the
shape of [samples, features] and 'labels' which contains the output
labels as a vector. Label numbers are assumed to start from 1.
Parameters
----------
data_file: str
Returns
-------
data: array_like
"""
print("Loading data from %s" % (data_file))
dataMat = scipy.io.loadmat(data_file, mat_dtype=True)
data = dataMat['featMat']
labels = dataMat['labels']
# indices = np.random.permutation(labels.shape[1]) # shuffling indices
print("Data loading complete. Shape is %r" % (dataMat['featMat'].shape,))
# return data[indices, :, :, :].astype(np.uint8), labels[:, indices].T - 1 # Shuffled indices
return dataMat['featMat'], dataMat['labels'].T - 1 # Sequential indices
def reformatInput(data, labels, indices):
"""
Receives the the indices for train and test datasets.
Outputs the train, validation, and test data and label datasets.
"""
trainIndices = indices[0][len(indices[1]):]
validIndices = indices[0][:len(indices[1])]
testIndices = indices[1]
# Shuffling training data
# shuffledIndices = np.random.permutation(len(trainIndices))
# trainIndices = trainIndices[shuffledIndices]
if data.ndim == 4:
return [(data[trainIndices], np.squeeze(labels[trainIndices]).astype(np.int32)),
(data[validIndices], np.squeeze(labels[validIndices]).astype(np.int32)),
(data[testIndices], np.squeeze(labels[testIndices]).astype(np.int32))]
elif data.ndim == 5:
return [(data[:, trainIndices], np.squeeze(labels[trainIndices]).astype(np.int32)),
(data[:, validIndices], np.squeeze(labels[validIndices]).astype(np.int32)),
(data[:, testIndices], np.squeeze(labels[testIndices]).astype(np.int32))]
if __name__ == '__main__':
data = np.random.normal(size=(100, 10))
print 'Original: {0}'.format(data)
print 'Augmented: {0}'.format(augment_EEG(data, 0.1, pca=True))