-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path04_extract_embeddings.py
94 lines (70 loc) · 4.58 KB
/
04_extract_embeddings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#Author: Dimitris Spathis (ds806@cl.cam.ac.uk)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from keras import *
from keras import backend as K
from keras.models import model_from_json, Model
import glob
from tqdm import tqdm
from utils import *
K.set_learning_phase(0)
#folder of the best pre-trained model (or replace with own)
folder = '20200115-105719'
X_train_timeseries_normalized = np.load('data/X_train_timeseries_normalized.npy')
X_train_timeseries_normalized = X_train_timeseries_normalized[:,:,6:] #select only ACC features (index here: data/feature_names/timeseries_feature_names.csv)
X_train_demographics_normalized = np.load('data/X_train_demographics_normalized.npy')
X_train_demographics_normalized_resting = X_train_demographics_normalized[:,0]
X_train_demographics_normalized_temporal = X_train_demographics_normalized[:,-4:]
X_test_timeseries_normalized = np.load('data/X_test_timeseries_normalized.npy')
X_test_timeseries_normalized = X_test_timeseries_normalized[:,:,6:] #select only ACC features (index here: data/feature_names/timeseries_feature_names.csv)
X_test_demographics_normalized = np.load('data/X_test_demographics_normalized.npy')
X_test_demographics_normalized_resting = X_test_demographics_normalized[:,0]
X_test_demographics_normalized_temporal = X_test_demographics_normalized[:,-4:]
model, loss = modular_model(X_train_timeseries_normalized.shape[2], 4, "quantile") #for a quantile model
#model, loss = modular_model(X_train_timeseries_normalized.shape[2], 5, "mse") #for vanilla mse
#load best model from the folder
model = model_from_json(open('./models/'+ folder +'/model_architecture.json').read()) #custom_objects={'exp': Activation(exponential)})
files = glob.glob('./models/'+folder+'/*.hdf5')
#WARNING! DEPENDS ON THE NAMING CONVENTION!
#we parse the filename ('./models/20191029-125927/weights-regression-improvement-51.03.hdf5')
#and we convert the MSE to a float in order to sort, the first is the lowest (lowest val_loss)
weights = sorted(files, key=lambda name: float(name[56:-5]))[0]
print ("=============Best model loaded:", weights)
model.load_weights(weights)
model.compile(loss=[loss], optimizer="adam")
model.summary()
print (model.layers)
print (model.input) #input layers (if multimodal, >1)
print (model.layers[-2:][0].output) #embeddings/penultimate layer (architecture-dependent)
#step2heart (A/R/T)
layer_activations = K.function([*model.input, K.learning_phase()],
[model.layers[-2:][0].output])
#a unimodal model (e.g. the autoencoder) can use a simpler input (no *)
#layer_activations = K.function([model.input, K.learning_phase()],
# [model.layers[5].output])
print (len(X_test_timeseries_normalized), len(X_train_timeseries_normalized))
activations_test=[]
for i in tqdm(range(0,len(X_test_timeseries_normalized), 197)): #197 (step) is a proper divisor of 22458 (test set size)
#we need to pass each input tensor individually
activations_test.append(layer_activations([X_test_timeseries_normalized[i:i+197],
X_test_demographics_normalized_temporal[i:i+197],
np.expand_dims(X_test_demographics_normalized_resting[i:i+197], axis=1),
]))
activations_train=[]
for i in tqdm(range(0,len(X_train_timeseries_normalized), 192)): #192 (step) is a proper divisor of 88320 (train set size)
#we need to pass each input tensor individually
activations_train.append(layer_activations([X_train_timeseries_normalized[i:i+192],
X_train_demographics_normalized_temporal[i:i+192],
np.expand_dims(X_train_demographics_normalized_resting[i:i+192], axis=1),
]))
print (np.squeeze(np.array(activations_train)).shape, np.squeeze(np.array(activations_test)).shape)
#[loops, steps, activation_dim] -> [loops*steps, activation_dim]
reshaped_arr_train = np.squeeze(np.array(activations_train)).reshape(len(X_train_timeseries_normalized), np.array(activations_train).shape[3]) #the dim of the output layer
reshaped_arr_train.shape
#[loops, steps, activation_dim] -> [loops*steps, activation_dim]
reshaped_arr_test = np.squeeze(np.array(activations_test)).reshape(len(X_test_timeseries_normalized), np.array(activations_test).shape[3]) #the dim of the output layer
reshaped_arr_test.shape
np.save('data/X_train_activations'+ folder +'.npy', reshaped_arr_train)
np.save('data/X_test_activations'+ folder +'.npy', reshaped_arr_test)