-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparams.json
106 lines (106 loc) · 2.45 KB
/
params.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
{
"CCS_BQ_MAX": 95,
"IP_MAX": 255,
"PW_MAX": 255,
"SN_MAX": 500,
"STRAND_MAX": 2,
"add_pos_encoding": true,
"allow_ffn_pad": true,
"alpha": 0.6,
"attention_dropout": 0.1,
"attn_win_size": 12,
"band_width": null,
"batch_size": 32768,
"beam_size": 4,
"beta_1": 0.9,
"beta_2": 0.999,
"buffer_size": 1000000,
"ccs_bq_hidden_size": 8,
"condense_transformer_input": true,
"conv_model": "resnet50",
"dataset_config_name": "ccs",
"default_batch_size": 32768,
"default_batch_size_tpu": 32768,
"del_cost": 10,
"distill_alpha": 100000,
"dnabert_desired_hidden_size": 0,
"end_learning_rate": 2.86594e-05,
"epsilon": 1e-06,
"extra_decode_length": 50,
"filter_size": 2048,
"hidden_size": 280,
"init_encoder_stack": true,
"init_nonencoder_layers": true,
"initial_learning_rate": 0.0036246,
"initializer_gain": 1,
"ip_hidden_size": 8,
"label_smoothing": 0.1,
"layer_norm": false,
"layer_postprocess_dropout": 0,
"learning_rate": 2,
"learning_rate_decay_rate": 1,
"learning_rate_warmup_steps": 16000,
"limit": -1,
"logit_loss_identifier": "mean_squared_error",
"loss_function": "alignment_loss",
"loss_reg": 0.1,
"max_length": 100,
"max_passes": 20,
"model_checkpoint_freq": "epoch",
"model_config_name": "transformer_learn_values_distill",
"model_name": "transformer_learn_values_distill",
"n_examples_eval": 30476144,
"n_examples_train": 800569037,
"num_channels": 1,
"num_epochs": 9,
"num_epochs_for_decay": 9,
"num_heads": 2,
"num_hidden_layers": 5,
"optimizer_adam_beta1": 0.9,
"optimizer_adam_beta2": 0.997,
"optimizer_adam_epsilon": 1e-09,
"per_base_hidden_size": 8,
"pw_hidden_size": 8,
"relu_dropout": 0,
"remove_label_gaps": false,
"rezero": true,
"seed": 1,
"sn_hidden_size": 8,
"static_batch": false,
"strand_hidden_size": 2,
"student_alpha": 1,
"student_encoder_layers": [
0,
1,
2,
3,
4
],
"subsample_examples": [],
"teacher_encoder_layers": [
1,
2,
3,
4,
5
],
"temperature": 1,
"tensorboard_update_freq": "batch",
"tpu_scale_factor": 1,
"transformer_input_size": 280,
"transformer_model_size": "base",
"trial": 1,
"use_bases": true,
"use_ccs": true,
"use_ccs_bq": true,
"use_dnabert": false,
"use_ip": true,
"use_pw": true,
"use_sn": true,
"use_strand": true,
"use_tpu": false,
"vocab_size": 5,
"warmup_steps": 0,
"weight_decay_rate": 0.0069868,
"dc_calibration": "0,0.900592,3.073242"
}