-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.py
127 lines (114 loc) · 4.24 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import tensorflow as tf
from pathlib import Path
from transformers import AutoTokenizer
class Config():
"""
Class for holding the configuration of a run, and setting up the accelerator strategy.
Arguments:
- model_name: Title of model from the HuggingFace directory of transformers.
- translation: Boolean determining whether non-English data should be translated using
the Google Translate library. Used for transformers that were trained
exclusively on English data.
- max_length: Maximum ength of sequences processed by the transformer. The longer, the
more long-term information can be learned, but takes up more memory.
- padding: Bollean determining whether tokenizer should pad sequences to max_length.
- batch_size
- epochs
- learning_rate
- metrics: metrics to be logged.
- verbose
- train_splits: how many folds to perform stratified cross-validation with.
- accelerator: 'TPU' or 'GPU'
- random_seed
"""
def __init__(
self,
model_name,
translation = True,
max_length = 64,
padding = True,
batch_size = 128,
epochs = 5,
learning_rate = 1e-5,
metrics = ["sparse_categorical_accuracy"],
verbose = 1,
train_splits = 5,
accelerator = "TPU",
random_seed = 6
):
# Set up
self.SEED = random_seed
self.ACCELERATOR = accelerator
self.PATH_TRAIN = Path("data/train.csv")
self.PATH_TEST = Path("data/test.csv")
self.TRAIN_SPLITS = train_splits
# Model configuration
self.MODEL_NAME = model_name
self.TRANSLATION = translation
self.TOKENIZER = AutoTokenizer.from_pretrained(self.MODEL_NAME)
# Model hyperparameters
self.MAX_LENGTH = max_length
self.PAD_TO_MAX_LENGTH = padding
self.BATCH_SIZE = batch_size
self.EPOCHS = epochs
self.LEARNING_RATE = learning_rate
self.METRICS = metrics
self.VERBOSE = verbose
# Language maps
self.LANGUAGE_MAP = {
"English" : 0,
"Chinese" : 1,
"Arabic" : 2,
"French" : 3,
"Swahili" : 4,
"Urdu" : 5,
"Vietnamese": 6,
"Russian" : 7,
"Hindi" : 8,
"Greek" : 9,
"Thai" : 10,
"Spanish" : 11,
"German" : 12,
"Turkish" : 13,
"Bulgarian" : 14
}
self.INVERSE_LANGUAGE_MAP = {v: k for k, v in self.LANGUAGE_MAP.items()}
# Initializing accelerator
self.initialize_accelerator()
def initialize_accelerator(self):
"""
Method for initializing accelerator strategy (TPU or GPU)
"""
# Checking TPU first
if self.ACCELERATOR == "TPU":
print("Connecting to TPU")
try:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
print(f"Running on TPU {tpu.master()}")
except ValueError:
print("Could not connect to TPU")
tpu = None
if tpu:
try:
print("Initializing TPU")
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
self.strategy = tf.distribute.experimental.TPUStrategy(tpu)
self.tpu = tpu
print("TPU initialized")
except _:
print("Failed to initialize TPU")
else:
print("Unable to initialize TPU")
self.ACCELERATOR = "GPU"
# Default for CPU and GPU otherwise
else:
print("Using default strategy for CPU and single GPU")
self.strategy = tf.distribute.get_strategy()
# Checking GPUs
if self.ACCELERATOR == "GPU":
print(f"GPUs Available: {len(tf.config.experimental.list_physical_devices('GPU'))}")
# Defining replicas
self.AUTO = tf.data.experimental.AUTOTUNE
self.REPLICAS = self.strategy.num_replicas_in_sync
print(f"REPLICAS: {self.REPLICAS}")