-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpca.py
137 lines (98 loc) · 4.51 KB
/
pca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import json
import os
import re
prompt_directory = "processed_outputs"
activations_directory = "activations_4"
solvers = ["RealGreedy e=0", "AntiGreedy e=0"] # which solvers we have
num_prompts = 400 # how many prompts we have of each
num_layers = 32 # how many layers the model has
def make_pca_graph(activations, answers_array, labels, layer):
# do the PCA
pca = PCA(2)
x = StandardScaler().fit_transform(activations)
principalComponents = pca.fit_transform(x)
print(f"Layer {layer}: {pca.explained_variance_ratio_}")
# make the dataframe
principalDf = pd.DataFrame(data = principalComponents, columns = ['principal component 1', 'principal component 2'])
labels = np.stack((answers_array, labels), axis=1)
finalDf = pd.concat([principalDf, pd.DataFrame(data = labels, columns = ["answers", "labels"])], axis = 1)
# make the graph
fig = plt.figure(figsize = (8,8))
ax = fig.add_subplot(1,1,1)
ax.set_xlabel('Principal Component 1', fontsize = 15)
ax.set_ylabel('Principal Component 2', fontsize = 15)
ax.set_title('2 component PCA', fontsize = 20)
markers = ["o", "^"]
targets = ["Greedy e=0", "AntiGreedy e=0"]
color_names = ['blue', 'green', 'red', 'yellow', 'purple']
colors_rgb = [(0.0, 0.15294117647058825, 1.0), (0.0, 0.8431372549019608, 0.0), (0.9294117647058824, 0.12156862745098039, 0.0),
(0.9294117647058824, 0.9803921568627451, 0.0), (0.5294117647058824, 0.1607843137254902, 1.0)]
for target, marker in zip(targets, markers):
for color, rgb_val in zip(color_names, colors_rgb):
indicesToKeep = finalDf[(finalDf['labels'] == target) & (finalDf['answers'] == color)]
if len(indicesToKeep) > 0:
handle = ax.scatter(indicesToKeep['principal component 1']
, indicesToKeep['principal component 2']
, color = rgb_val
, marker = marker
, s = 50)
ax.grid()
plt.savefig(f"results_layer_{layer}.png", dpi = 500)
plt.close()
def do_pca(activations, n_components):
# do the PCA
pca = PCA(n_components)
x = StandardScaler().fit_transform(activations)
principalComponents = pca.fit_transform(x)
return principalComponents
def main1():
labels = ["Greedy e=0"]*num_prompts + ["AntiGreedy e=0"]*num_prompts
for i in range(num_layers):
activations_array = []
answers_array = []
# absolute prompt number
n=0
for solver in solvers:
for j in range(num_prompts):
activations_path = os.path.join(activations_directory, f"prompt_{n}_layer_{i}.npy")
act = np.load(activations_path).squeeze() # we cut out the first dimension immediately (because it's just 1)
activations_array.append(act)
# get the color from the prompt
prompt_filename = os.path.join(prompt_directory, f"{solver} {j}.json")
with open(prompt_filename, "r") as f:
prompt = json.load(f)
answer = prompt[1]['content']
# make a regex to extract the answer
pattern = r"<Answer>(.*?)</Answer>"
matches = re.findall(pattern, answer)
color = matches[0].strip().lower()
answers_array.append(color)
n+=1
activations = np.stack(activations_array)
make_pca_graph(activations, answers_array, labels, i)
def main2():
pc_values = []
for i in range(num_layers):
activations_array = []
answers_array = []
# absolute prompt number
n=0
for solver in solvers:
for j in range(num_prompts):
activations_path = os.path.join(activations_directory ,f"prompt_{n}_layer_{i}.npy")
act = np.load(activations_path).squeeze() # we cut out the first dimension immediately (because it's just 1)
activations_array.append(act)
n+=1
activations = np.stack(activations_array)
pc_values.append(do_pca(activations, 6))
# saving pc_values
pc_values = np.array(pc_values) # (32,52,5)
np.save(f'pc_values', pc_values)
if __name__ == "__main__":
# main1()
main2()