-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtrain.py
83 lines (67 loc) · 2.83 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# l5kit
import l5kit, os, albumentations as A
from l5kit.configs import load_config_data
from l5kit.data import LocalDataManager, PERCEPTION_LABELS
from l5kit.rasterization import build_rasterizer
from l5kit.visualization import draw_trajectory, TARGET_POINTS_COLOR
from l5kit.geometry import transform_points
from l5kit.evaluation import write_pred_csv, compute_metrics_csv, read_gt_csv, create_chopped_dataset
from l5kit.evaluation.chop_dataset import MIN_FUTURE_STEPS
from l5kit.evaluation.metrics import neg_multi_log_likelihood, time_displace
from l5kit.evaluation.csv_utils import write_pred_csv
from catalyst import dl
from catalyst.dl import utils
from collections import Counter
import datetime
import matplotlib.pyplot as plt
import numpy as np
import omegaconf
import torch
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers.neptune import NeptuneLogger
from tqdm import tqdm
from model import LyftMultiModel, set_seed
# from logzero import logger
set_seed(42)
# Hyperparameters
cfg = load_config_data("/data/lyft-motion-prediction-autonomous-vehicles/lyft-config-files/agent_motion_config.yaml")
cfg = omegaconf.DictConfig(cfg)
name_for_save = 'Big_training'
epochs = cfg["model_params"]["epochs"]
learning_rate = cfg["model_params"]["lr"]
training_percentage = cfg["train_data_loader"]["training_percentage"]
validation_percentage = cfg["val_data_loader"]["validation_percentage"]
API_KEY = os.environ.get('NEPTUNE_API_KEY')
neptune_logger = NeptuneLogger(
api_key=API_KEY,
project_name='hvergnes/KaggleResNet',
params={'epoch_nr': epochs, 'learning_rate': learning_rate, 'train_size': training_percentage, 'test_size': validation_percentage}, # your hyperparameters, immutable
tags=['ResNet'], # tags
)
os.environ["L5KIT_DATA_FOLDER"] = "/data/lyft-motion-prediction-autonomous-vehicles"
dm = LocalDataManager()
cfg = load_config_data("/data/lyft-motion-prediction-autonomous-vehicles/lyft-config-files/agent_motion_config.yaml")
cfg = omegaconf.DictConfig(cfg)
rasterizer = build_rasterizer(cfg, dm)
model = LyftMultiModel(dm, cfg, rasterizer)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.load_state_dict(torch.load(cfg["model_params"]["weight_path"]))
model.to(device)
checkpoint_callback = ModelCheckpoint(
verbose=0,
monitor='val_loss',
mode='min',
prefix='lyfnet_',
)
trainer = Trainer(
max_epochs=epochs,
gradient_clip_val=cfg["model_params"]["gradient_clip_val"],
logger=neptune_logger,
checkpoint_callback=checkpoint_callback,
# limit_val_batches=LIMIT_VAL_BATCHES,
gpus=1,
)
trainer.fit(model)
time_of_save = datetime.datetime.now().strftime("%d,%H")
torch.save(model.state_dict(), 'save/' + name_for_save + time_of_save + '.pt')