Naming error in the formatting of the filepath (#277)

* Naming error in the formatting of the filepath Data_from_dates function was unable to grab data from file because the filename was slightly wrong. Changed from 'datalog ' to 'datalog_' which is how the datalog file is named in ProCoDA. * fix other filepath errors in procoda_parser, increment version number Co-authored-by: HannahSi <hs649@cornell.edu>
AguaClara · Aug 25, 2020 · 41603d6 · 41603d6
1 parent 9ee3d1d
commit 41603d6
Show file tree

Hide file tree

Showing 10 changed files with 14 additions and 14 deletions.
diff --git a/aguaclara/research/procoda_parser.py b/aguaclara/research/procoda_parser.py
@@ -104,7 +104,7 @@ def get_data_by_time(path, columns, dates, start_time='00:00', end_time='23:59',
                      extension='.tsv', units=""):
     """Extract columns of data over one or more ProCoDA data files based on date
     and time. Valid only for files whose names are automatically generated by
-    date, i.e. of the form "datalog M-D-YYYY".
+    date, i.e. of the form "datalog_M-D-YYYY".
 
     Note: Column 0 is time. The first data column is column 1.
 
@@ -207,7 +207,7 @@ def data_from_dates(path, dates, extension):
 
     data = []
     for d in dates:
-        filepath = os.path.join(path, 'datalog ' + d + extension)
+        filepath = os.path.join(path, 'datalog_' + d + extension)
         data.append(remove_notes(pd.read_csv(filepath, delimiter='\t')))
 
     return data
@@ -282,8 +282,8 @@ def get_data_by_state(path, dates, state, column, extension=".tsv"):
         dates = [dates]
 
     for d in dates:
-        state_file = path + "statelog " + d + extension
-        data_file = path + "datalog " + d + extension
+        state_file = path + "statelog_" + d + extension
+        data_file = path + "datalog_" + d + extension
 
         states = pd.read_csv(state_file, delimiter='\t')
         data = pd.read_csv(data_file, delimiter='\t')
@@ -455,8 +455,8 @@ def read_state(dates, state, column, units="", path="", extension=".tsv"):
         dates = [dates]
 
     for d in dates:
-        state_file = path + "statelog " + d + extension
-        data_file = path + "datalog " + d + extension
+        state_file = path + "statelog_" + d + extension
+        data_file = path + "datalog_" + d + extension
 
         states = pd.read_csv(state_file, delimiter='\t')
         data = pd.read_csv(data_file, delimiter='\t')
@@ -557,8 +557,8 @@ def average_state(dates, state, column, units="", path="", extension=".tsv"):
         dates = [dates]
 
     for d in dates:
-        state_file = path + "statelog " + d + extension
-        data_file = path + "datalog " + d + extension
+        state_file = path + "statelog_" + d + extension
+        data_file = path + "datalog_" + d + extension
 
         states = pd.read_csv(state_file, delimiter='\t')
         data = pd.read_csv(data_file, delimiter='\t')
@@ -671,8 +671,8 @@ def avg_with_units(lst):
         dates = [dates]
 
     for d in dates:
-        state_file = path + "statelog " + d + extension
-        data_file = path + "datalog " + d + extension
+        state_file = path + "statelog_" + d + extension
+        data_file = path + "datalog_" + d + extension
 
         states = pd.read_csv(state_file, delimiter='\t')
         data = pd.read_csv(data_file, delimiter='\t')

diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name = 'aguaclara',
-    version = '0.2.3',
+    version = '0.2.4',
     description = (
         'An open-source Python package for designing and performing research '
         'on AguaClara water treatment plants.'

diff --git a/tests/research/data/datalog 6-14-2018.xls → tests/research/data/datalog_6-14-2018.xls b/tests/research/data/datalog 6-14-2018.xls → tests/research/data/datalog_6-14-2018.xls
diff --git a/tests/research/data/datalog 6-15-2018.xls → tests/research/data/datalog_6-15-2018.xls b/tests/research/data/datalog 6-15-2018.xls → tests/research/data/datalog_6-15-2018.xls
diff --git a/tests/research/data/datalog 6-19-2013.xls → tests/research/data/datalog_6-19-2013.xls b/tests/research/data/datalog 6-19-2013.xls → tests/research/data/datalog_6-19-2013.xls
diff --git a/tests/research/data/datalog 6-20-2013.xls → tests/research/data/datalog_6-20-2013.xls b/tests/research/data/datalog 6-20-2013.xls → tests/research/data/datalog_6-20-2013.xls
diff --git a/tests/research/data/statelog 6-14-2018.xls → tests/research/data/statelog_6-14-2018.xls b/tests/research/data/statelog 6-14-2018.xls → tests/research/data/statelog_6-14-2018.xls
diff --git a/tests/research/data/statelog 6-19-2013.xls → tests/research/data/statelog_6-19-2013.xls b/tests/research/data/statelog 6-19-2013.xls → tests/research/data/statelog_6-19-2013.xls
diff --git a/tests/research/data/statelog 6-20-2013.xls → tests/research/data/statelog_6-20-2013.xls b/tests/research/data/statelog 6-20-2013.xls → tests/research/data/statelog_6-20-2013.xls
diff --git a/tests/research/test_ProCoDA_Parser.py b/tests/research/test_ProCoDA_Parser.py
@@ -13,11 +13,11 @@ def test_get_data_by_time(self):
         '''
         path = os.path.join(os.path.dirname(__file__), '.', 'data')
 
-        data_day1 = pd.read_csv(path + '/datalog 6-14-2018.xls', delimiter='\t')
+        data_day1 = pd.read_csv(path + '/datalog_6-14-2018.xls', delimiter='\t')
         data_day1 = np.round([pd.to_numeric(data_day1.iloc[:, 0]), pd.to_numeric(data_day1.iloc[:, 4])], 5)
         data_day1 = [data_day1[0].tolist(), data_day1[1].tolist()]
 
-        data_day2 = pd.read_csv(path + '/datalog 6-15-2018.xls', delimiter='\t')
+        data_day2 = pd.read_csv(path + '/datalog_6-15-2018.xls', delimiter='\t')
         data_day2 = np.round([pd.to_numeric(data_day2.iloc[:, 0]), pd.to_numeric(data_day2.iloc[:, 4])], 5)
         data_day2 = [data_day2[0].tolist(), data_day2[1].tolist()]
         data_day2[0][0] = 0  # to remove scientific notation "e-"
@@ -98,7 +98,7 @@ def test_get_data_by_state(self):
 
         output = get_data_by_state(path, dates=["6-19-2013"], state=1, column=1, extension=".xls")  # , "6-20-2013"
 
-        datafile = pd.read_csv(path + "/datalog 6-19-2013.xls", delimiter='\t')
+        datafile = pd.read_csv(path + "/datalog_6-19-2013.xls", delimiter='\t')
         time_and_data1 = np.array([pd.to_numeric(datafile.iloc[:, 0]),
                                    np.round(pd.to_numeric(datafile.iloc[:, 1]), 5)])
         start_time = time_and_data1[0, 0]