Skip to content

Commit

Permalink
fixed image augmentation step
Browse files Browse the repository at this point in the history
  • Loading branch information
bwwjohnson committed Nov 19, 2022
1 parent 1155a35 commit 4664679
Showing 1 changed file with 149 additions and 0 deletions.
149 changes: 149 additions & 0 deletions image_aug.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
# -*- coding: utf-8 -*-
"""image_augmentation_ben_pc.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1iHvkV1IOUOCNQH2MeZLeq6bX4--PZkXA
"""



# Commented out IPython magic to ensure Python compatibility.
# importing all the required libraries
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import skimage.io as io
from skimage.transform import rotate, AffineTransform, warp
import os
import pandas as pd
import matplotlib.pyplot as plt
# %matplotlib inline

path = '' #path to save files
#dir_path_0 = r'/content/drive/MyDrive/Hackerthon_data/0_img' #path to file for scenario 0
#dir_path_1 = r'/content/drive/MyDrive/Hackerthon_data/1_img' #path to file for scenario 1
#dir_path_2 = r'/content/drive/MyDrive/Hackerthon_data/2_img' #path to file for scenario 2


path = os.getcwd()
df = pd.read_csv(path+'/train.csv')
df['File_ID'] = df.example_path.str.extract('(\d+)')
df['version_no'] = np.zeros([len(df)])

df_0 = df.copy(deep=True)
df_0.drop(df_0[df_0['label']!=0].index,inplace=True)
df_0.reset_index(inplace=True)
df_1 = df.copy(deep=True)
df_1.drop(df_1[df_1['label']!=1].index,inplace=True)
df_1.reset_index(inplace=True)
df_2 = df.copy(deep=True)
df_2.drop(df_2[df_2['label']!=2].index,inplace=True)
df_2.reset_index(inplace=True)



def percentage_matcher(n_iter,df,df_0,df_1,df_2,path,outpath):
'''change the percentages of files in each scenario to be similar to 2 dp.
input path0: path to scenario 0
input path1: path to scenario 1
input path2: path to scenario 2
input n_iter: numbner of iterations for the for loop
input df: dataframe of all training data
input df_0: dataframe of scenario 0
input df_1: dataframe of scenario 1
input df_2: dataframe of scenario 2
input path: path to folder location to save file e.g.,/content/drive/MyDrive/Hackerthon_data/
return lengths: the length of each directory
return percentages: the percentage of evenets in each directory
return df: dataframe of augmented training data'''

len_0 = len(df_0)#(len([entry for entry in os.listdir(dir_path_0) if os.path.isfile(os.path.join(dir_path_0, entry))])) #calculate the number of files in the directory
len_1 = len(df_1)#(len([entry for entry in os.listdir(dir_path_1) if os.path.isfile(os.path.join(dir_path_1, entry))])) #calculate the number of files in the directory
len_2 = len(df_2)#(len([entry for entry in os.listdir(dir_path_2) if os.path.isfile(os.path.join(dir_path_2, entry))])) #calculate the number of files in the directory
lengths = np.array([len_0,len_1,len_2]) #put the starting number of flies for each scenario into an array
percentages = (lengths)/np.sum(lengths) #calculate the starting percentages of each image in the three scenarios
folders = np.array(['0_img','1_img','2_img'])

for i in range(n_iter):
min_length = min(lengths)
boolarr = lengths == min_length #identify scenario with less events
lengths[boolarr] = lengths[boolarr] + 5
percentages = (lengths)/np.sum(lengths)

if folders[boolarr] == '0_img':
sample_img_no = np.array(df_0.sample())
elif folders[boolarr] == '1_img':
sample_img_no = np.array(df_1.sample())
elif folders[boolarr] == '2_img':
sample_img_no = np.array(df_2.sample())
image_path = '{h}/{p}.png'.format(h=path,k=folders[boolarr][0],p=sample_img_no[0][-2])
img = io.imread(image_path)
io.imshow(img)
rotations = [90,180,270]

for j in range(len(rotations)):
rotated = rotate(img, angle=rotations[j], mode = 'wrap')
sample_img_no[0][-1] +=1
sample_img_no_version = sample_img_no[0][1:]
df_ev = pd.DataFrame([sample_img_no_version],columns=['label','latitude','longitude','year','example_path','File_ID','version_no'])
df_ev['example_path'] = 'train_test_data/train/{p}_{q}.png'.format(p=df_ev['File_ID'].iloc[-1],q=int(df_ev['version_no'].iloc[-1]))
df = pd.concat([df,df_ev],ignore_index=True)
io.imsave('{h}/{p}_{q}.png'.format(h=outpath,k=folders[boolarr][0],p=df['File_ID'].iloc[-1],q=int(df['version_no'].iloc[-1])),arr=rotated)

flipLR = np.fliplr(img)
sample_img_no[0][-1] +=1
sample_img_no_version = sample_img_no[0][1:]
df_ev = pd.DataFrame([sample_img_no_version],columns=['label','latitude','longitude','year','example_path','File_ID','version_no'])
df_ev['example_path'] = 'rain_test_data/train/{p}_{q}.png'.format(p=df_ev['File_ID'].iloc[-1],q=int(df_ev['version_no'].iloc[-1]))
df = pd.concat([df,df_ev],ignore_index=True)
io.imsave('{h}/{p}_{q}.png'.format(h=outpath,k=folders[boolarr][0],p=df['File_ID'].iloc[-1],q=int(df['version_no'].iloc[-1])),arr=flipLR)

flipUD = np.flipud(img)
sample_img_no[0][-1] +=1
sample_img_no_version = sample_img_no[0][1:]
df_ev = pd.DataFrame([sample_img_no_version],columns=['label','latitude','longitude','year','example_path','File_ID','version_no'])
df_ev['example_path'] = 'rain_test_data/train/{p}_{q}.png'.format(p=df_ev['File_ID'].iloc[-1],q=int(df_ev['version_no'].iloc[-1]))
df = pd.concat([df,df_ev],ignore_index=True)
io.imsave('{h}/{p}_{q}.png'.format(h=outpath,k=folders[boolarr][0],p=df['File_ID'].iloc[-1],q=int(df['version_no'].iloc[-1])),arr=flipUD)
return lengths, percentages, df

lengths, percentages,df = percentage_matcher(30,df,df_0,df_1,df_2,path+'/train_test_data'+'/train',path+'/figs')



df.to_csv(path+'/new_train.csv')


print(len(df))

# visualise
plt.figure(figsize = (8,8))
plt.pie(df.groupby('label').size(), labels = label, autopct='%1.1f%%', shadow=True, startangle=90)
plt.show()

























0 comments on commit 4664679

Please sign in to comment.