forked from wesg52/sparse-probing-paper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
59 lines (51 loc) · 1.67 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import datetime
import random
import numpy as np
import torch
import argparse
import time
def seed_all(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
def timestamp():
return datetime.datetime.now().strftime("%Y:%m:%d:%H:%M:%S")
def default_argument_parser():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
# experiment params
parser.add_argument(
'--experiment_name', default=str(int(time.time()) // 10),
help='Name of experiment to save')
parser.add_argument(
'--experiment_type', nargs='+', required=True,
help='The inner loop function(s) to run for the experiment')
parser.add_argument(
'--model', default='pythia-70m',
help='Name of model from TransformerLens')
parser.add_argument(
'--feature_dataset',
help='Name of cached feature dataset')
parser.add_argument(
'--probe_location', default='mlp.hook_post',
help='Model component to probe')
parser.add_argument(
'--activation_aggregation', default=None,
help='Average activations across all tokens in a sequence')
parser.add_argument(
'--seed', default=1, type=int,
help='Random seed for experiment')
parser.add_argument(
'--probe_next_token_feature', action='store_true',
help='Probe the token before the probe_index to predict property of the probe_index')
return parser
MODEL_N_LAYERS = {
'pythia-70m': 6,
'pythia-160m': 12,
'pythia-410m': 24,
'pythia-1b': 16,
'pythia-1.4b': 24,
'pythia-2.8b': 32,
'pythia-6.9b': 32
}