-
Notifications
You must be signed in to change notification settings - Fork 2
/
data_readin.py
73 lines (59 loc) · 2.3 KB
/
data_readin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#Import the neccessary libraries and functions
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re
import os
import glob
from matplotlib import rcParams
from pandas import Series, DataFrame
import csv
import datetime as datetime
from datetime import datetime
import warnings
from tkinter import filedialog
from tkinter import *
warnings.filterwarnings('ignore')
#Reads in data from files, appends them into a single dataframe
def data_read_in(result):
'''Input is list of the desired files. Also ensure you are in the correct
directory before running this function'''
result = result
index_values = []
appended_data=[]
pd.DataFrame()
for i in result:
try:
index_values.append(i)
#Splits the file name so the sensor name and type can be seperated
sensor,type = i.split('_')
#Subsets the important info from the name
type = type[7:]
type = type[:-4]
#Reads in the data and assigns it to a dataframe using Pandas
df = pd.read_csv(i,header=2,usecols=[1,2,3],names=['datetime','temp','light_intensity'])
#Create new columns that can be referenced to later
df['source']=i
df['site']= sensor
df['type']=type
#Create a list of all the dataframe names to be referenced when assembling master dataframe
appended_data.append(df)
#Removes possible errors from occuring
except:AttributeError
#Assemble all dataframes into one master file
df = pd.concat(appended_data)
#Export all the observations to a csv
df.to_csv('full_dataframe.csv')
#Assemble the dataframe and seperate datetime features so they can be used later
x = pd.DataFrame(df)
x['datetime'] = pd.to_datetime(x['datetime'])
x['temp'] = pd.to_numeric(x['temp'])
x['light_intensity'] = pd.to_numeric(x['light_intensity'])
x['Year'] = pd.DatetimeIndex(x['datetime']).year
x['Month'] = pd.DatetimeIndex(x['datetime']).month
x['Day'] = pd.DatetimeIndex(x['datetime']).day
x['Week'] = pd.DatetimeIndex(x['datetime']).week
x['Time'] = pd.DatetimeIndex(x['datetime']).time
x['Hour'] = pd.DatetimeIndex(x['datetime']).hour
x['Minute'] = pd.DatetimeIndex(x['datetime']).minute
return x