-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
1155a35
commit 4664679
Showing
1 changed file
with
149 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
# -*- coding: utf-8 -*- | ||
"""image_augmentation_ben_pc.ipynb | ||
Automatically generated by Colaboratory. | ||
Original file is located at | ||
https://colab.research.google.com/drive/1iHvkV1IOUOCNQH2MeZLeq6bX4--PZkXA | ||
""" | ||
|
||
|
||
|
||
# Commented out IPython magic to ensure Python compatibility. | ||
# importing all the required libraries | ||
import warnings | ||
warnings.filterwarnings('ignore') | ||
import numpy as np | ||
import skimage.io as io | ||
from skimage.transform import rotate, AffineTransform, warp | ||
import os | ||
import pandas as pd | ||
import matplotlib.pyplot as plt | ||
# %matplotlib inline | ||
|
||
path = '' #path to save files | ||
#dir_path_0 = r'/content/drive/MyDrive/Hackerthon_data/0_img' #path to file for scenario 0 | ||
#dir_path_1 = r'/content/drive/MyDrive/Hackerthon_data/1_img' #path to file for scenario 1 | ||
#dir_path_2 = r'/content/drive/MyDrive/Hackerthon_data/2_img' #path to file for scenario 2 | ||
|
||
|
||
path = os.getcwd() | ||
df = pd.read_csv(path+'/train.csv') | ||
df['File_ID'] = df.example_path.str.extract('(\d+)') | ||
df['version_no'] = np.zeros([len(df)]) | ||
|
||
df_0 = df.copy(deep=True) | ||
df_0.drop(df_0[df_0['label']!=0].index,inplace=True) | ||
df_0.reset_index(inplace=True) | ||
df_1 = df.copy(deep=True) | ||
df_1.drop(df_1[df_1['label']!=1].index,inplace=True) | ||
df_1.reset_index(inplace=True) | ||
df_2 = df.copy(deep=True) | ||
df_2.drop(df_2[df_2['label']!=2].index,inplace=True) | ||
df_2.reset_index(inplace=True) | ||
|
||
|
||
|
||
def percentage_matcher(n_iter,df,df_0,df_1,df_2,path,outpath): | ||
'''change the percentages of files in each scenario to be similar to 2 dp. | ||
input path0: path to scenario 0 | ||
input path1: path to scenario 1 | ||
input path2: path to scenario 2 | ||
input n_iter: numbner of iterations for the for loop | ||
input df: dataframe of all training data | ||
input df_0: dataframe of scenario 0 | ||
input df_1: dataframe of scenario 1 | ||
input df_2: dataframe of scenario 2 | ||
input path: path to folder location to save file e.g.,/content/drive/MyDrive/Hackerthon_data/ | ||
return lengths: the length of each directory | ||
return percentages: the percentage of evenets in each directory | ||
return df: dataframe of augmented training data''' | ||
|
||
len_0 = len(df_0)#(len([entry for entry in os.listdir(dir_path_0) if os.path.isfile(os.path.join(dir_path_0, entry))])) #calculate the number of files in the directory | ||
len_1 = len(df_1)#(len([entry for entry in os.listdir(dir_path_1) if os.path.isfile(os.path.join(dir_path_1, entry))])) #calculate the number of files in the directory | ||
len_2 = len(df_2)#(len([entry for entry in os.listdir(dir_path_2) if os.path.isfile(os.path.join(dir_path_2, entry))])) #calculate the number of files in the directory | ||
lengths = np.array([len_0,len_1,len_2]) #put the starting number of flies for each scenario into an array | ||
percentages = (lengths)/np.sum(lengths) #calculate the starting percentages of each image in the three scenarios | ||
folders = np.array(['0_img','1_img','2_img']) | ||
|
||
for i in range(n_iter): | ||
min_length = min(lengths) | ||
boolarr = lengths == min_length #identify scenario with less events | ||
lengths[boolarr] = lengths[boolarr] + 5 | ||
percentages = (lengths)/np.sum(lengths) | ||
|
||
if folders[boolarr] == '0_img': | ||
sample_img_no = np.array(df_0.sample()) | ||
elif folders[boolarr] == '1_img': | ||
sample_img_no = np.array(df_1.sample()) | ||
elif folders[boolarr] == '2_img': | ||
sample_img_no = np.array(df_2.sample()) | ||
image_path = '{h}/{p}.png'.format(h=path,k=folders[boolarr][0],p=sample_img_no[0][-2]) | ||
img = io.imread(image_path) | ||
io.imshow(img) | ||
rotations = [90,180,270] | ||
|
||
for j in range(len(rotations)): | ||
rotated = rotate(img, angle=rotations[j], mode = 'wrap') | ||
sample_img_no[0][-1] +=1 | ||
sample_img_no_version = sample_img_no[0][1:] | ||
df_ev = pd.DataFrame([sample_img_no_version],columns=['label','latitude','longitude','year','example_path','File_ID','version_no']) | ||
df_ev['example_path'] = 'train_test_data/train/{p}_{q}.png'.format(p=df_ev['File_ID'].iloc[-1],q=int(df_ev['version_no'].iloc[-1])) | ||
df = pd.concat([df,df_ev],ignore_index=True) | ||
io.imsave('{h}/{p}_{q}.png'.format(h=outpath,k=folders[boolarr][0],p=df['File_ID'].iloc[-1],q=int(df['version_no'].iloc[-1])),arr=rotated) | ||
|
||
flipLR = np.fliplr(img) | ||
sample_img_no[0][-1] +=1 | ||
sample_img_no_version = sample_img_no[0][1:] | ||
df_ev = pd.DataFrame([sample_img_no_version],columns=['label','latitude','longitude','year','example_path','File_ID','version_no']) | ||
df_ev['example_path'] = 'rain_test_data/train/{p}_{q}.png'.format(p=df_ev['File_ID'].iloc[-1],q=int(df_ev['version_no'].iloc[-1])) | ||
df = pd.concat([df,df_ev],ignore_index=True) | ||
io.imsave('{h}/{p}_{q}.png'.format(h=outpath,k=folders[boolarr][0],p=df['File_ID'].iloc[-1],q=int(df['version_no'].iloc[-1])),arr=flipLR) | ||
|
||
flipUD = np.flipud(img) | ||
sample_img_no[0][-1] +=1 | ||
sample_img_no_version = sample_img_no[0][1:] | ||
df_ev = pd.DataFrame([sample_img_no_version],columns=['label','latitude','longitude','year','example_path','File_ID','version_no']) | ||
df_ev['example_path'] = 'rain_test_data/train/{p}_{q}.png'.format(p=df_ev['File_ID'].iloc[-1],q=int(df_ev['version_no'].iloc[-1])) | ||
df = pd.concat([df,df_ev],ignore_index=True) | ||
io.imsave('{h}/{p}_{q}.png'.format(h=outpath,k=folders[boolarr][0],p=df['File_ID'].iloc[-1],q=int(df['version_no'].iloc[-1])),arr=flipUD) | ||
return lengths, percentages, df | ||
|
||
lengths, percentages,df = percentage_matcher(30,df,df_0,df_1,df_2,path+'/train_test_data'+'/train',path+'/figs') | ||
|
||
|
||
|
||
df.to_csv(path+'/new_train.csv') | ||
|
||
|
||
print(len(df)) | ||
|
||
# visualise | ||
plt.figure(figsize = (8,8)) | ||
plt.pie(df.groupby('label').size(), labels = label, autopct='%1.1f%%', shadow=True, startangle=90) | ||
plt.show() | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|