-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdataset.py
78 lines (61 loc) · 2.68 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import torch
from torch.utils.data import Dataset
import torchvision
import numpy as np
import cfg
import os
from PIL import Image
import math
LABEL_FILE_PATH = "data01/label.txt"
IMG_BASE_DIR = "data01"
transforms = torchvision.transforms.Compose([
torchvision.transforms.ToTensor()
])
def one_hot(cls_num, v):
b = np.zeros(cls_num)
b[v] = 1.
return b
class MyDataset(Dataset):
def __init__(self):
with open(LABEL_FILE_PATH) as f:
self.dataset = f.readlines()
def make_squre(self, im, max_size=416):
im = Image.open(im)
x, y = im.size
scale = max(x, y) / 416
size = (int(x / scale), int(y / scale))
dx, dy = round((max_size - size[0]) / 2), round((max_size - size[1]) / 2)
new_im = Image.new("RGB", (max_size, max_size))
resize_im = im.resize(size, 1)
new_im.paste(resize_im, (dx, dy))
return new_im, scale, dx, dy
def __len__(self):
return len(self.dataset)
def __getitem__(self, index):
labels = {}
line = self.dataset[index]
strs = line.split()
# _img_data = Image.open(os.path.join(IMG_BASE_DIR, strs[0]))
_img_data, scale, dx, dy = self.make_squre(os.path.join(IMG_BASE_DIR, strs[0]))
img_data = transforms(_img_data)
# _boxes = np.array([float(x) for x in strs[1:]])
_boxes = np.array(list(map(float, strs[1:])))
boxes = np.split(_boxes, len(_boxes) // 5)
for feature_size, anchors in cfg.ANCHORS_GROUP.items():
labels[feature_size] = np.zeros(shape=(feature_size, feature_size, 3, 5 + cfg.CLASS_NUM))
for box in boxes:
cls, cx, cy, w, h = box
cx_offset, cx_index = math.modf((cx / scale + dx) * feature_size / cfg.IMG_WIDTH)
cy_offset, cy_index = math.modf((cy / scale + dy) * feature_size / cfg.IMG_WIDTH)
w /= scale
h /= scale
for i, anchor in enumerate(anchors):
anchor_area = cfg.ANCHORS_GROUP_AREA[feature_size][i]/(scale**2)
p_w, p_h = w / (anchor[0]/scale), h / (anchor[1]/scale)
p_area = w * h
intersection = np.minimum((anchor[0]/scale), w) * np.minimum((anchor[0]/scale), h)
# iou = min(p_area, anchor_area) / max(p_area, anchor_area)
iou = intersection / (p_area + anchor_area - intersection)
labels[feature_size][int(cy_index), int(cx_index), i] = np.array(
[iou, cx_offset, cy_offset, np.log(p_w), np.log(p_h), *one_hot(cfg.CLASS_NUM, int(cls))])
return labels[13], labels[26], labels[52], img_data