From 0a774df1e2cf469180eda1df3f9467d5323a3e64 Mon Sep 17 00:00:00 2001
From: Arianna Curillo <asc327@cornell.edu>
Date: Tue, 10 Nov 2020 11:38:48 -0500
Subject: [PATCH] Asc327/procoda parser.py (#283)

* test case for test_column_of_data

* test case for test_day_fraction

* added test case for test_time_column_index

* comment out other method that tests time_column_index

* fixed indentation

* added test case for test_data_from_dates

* added test cases and clarified specs for column_start_to_end

* uncommented test_get_data_by_time

* added testing file; changed column_start_to_end test case

* test for plot_columns and removed unnecessary parameter

* added test cases for plot_column

* added valueerror test case

* asserted expression in plot_columns

* added test cases for iplot_columns

* removed parameter for figure in plot_columns and iplot_columns

* testing to see why CI report failed

* column_of_time allows any unit of time return type; test case for it

* added imports to hopefully fix failing build

* testing codecov

* undid testing codecov comment

* attempted to test on file statelog 11-5-19 downloaded into local drive

* get_data_by_state accepts github.com URLS

* implements and tests the intersection function

* optimizing procoda_parser functions

* simplified fxns, added units to get_data_by_time

* Fix documentation on intersect function

* added elapsed time option to get_data_by_time

* Fixed the test_intersect function

* Updated version

Co-authored-by: Alice Zhao <az297@cornell.edu>
Co-authored-by: HannahSi <hs649@cornell.edu>
---
 aguaclara/research/procoda_parser.py       | 486 ++++++---------
 setup.py                                   |   2 +-
 tests/research/data/datalog_11-5-2019.csv  |  67 +++
 tests/research/data/datalog_11-5-2019.tsv  |   1 +
 tests/research/data/datalog_6-16-2018.xls  |   3 +
 tests/research/data/statelog_11-5-2019.csv |  33 +
 tests/research/data/statelog_11-5-2019.tsv |   1 +
 tests/research/test_ProCoDA_Parser.py      | 661 +++++++++++++++------
 8 files changed, 749 insertions(+), 505 deletions(-)
 create mode 100644 tests/research/data/datalog_11-5-2019.csv
 create mode 100644 tests/research/data/datalog_11-5-2019.tsv
 create mode 100644 tests/research/data/datalog_6-16-2018.xls
 create mode 100644 tests/research/data/statelog_11-5-2019.csv
 create mode 100644 tests/research/data/statelog_11-5-2019.tsv

diff --git a/aguaclara/research/procoda_parser.py b/aguaclara/research/procoda_parser.py
index ae04f0bd..66404f90 100644
--- a/aguaclara/research/procoda_parser.py
+++ b/aguaclara/research/procoda_parser.py
@@ -43,7 +43,7 @@ def column_of_data(path, start, column, end=None, units=""):
     return np.array(num_data) * u(units)
 
 
-def column_of_time(path, start, end=None):
+def column_of_time(path, start, end=None, units="day"):
     """This function extracts the column of times as elasped times from a ProCoDA data file.
 
     :param path: The file path of the ProCoDA data file.
@@ -52,9 +52,11 @@ def column_of_time(path, start, end=None):
     :type start: int
     :param end: Index of last row of data to extract from the data. Defaults to last row
     :type end: int
+    :param units: The return type units, which defaults to day.
+    :type units: string, optional
 
     :return: Experimental times starting at 0
-    :rtype: numpy.ndarray in units of days
+    :rtype: numpy.ndarray in units of days or hours, specified with units parameter
 
     :Examples:
 
@@ -70,7 +72,81 @@ def column_of_time(path, start, end=None):
     num_day_times = pd.to_numeric(day_times[is_numeric])
     elapsed_times = num_day_times - start_time
 
-    return np.array(elapsed_times) * u.day
+    return (np.array(elapsed_times) * u.day).to(u(units))
+
+
+def plot_columns(path, columns, x_axis=None):
+    """Plot columns of data, located by labels, in the given data file.
+
+    :param path: The file path of the ProCoDA data file
+    :type path: string
+    :param columns: A single column label or list of column labels
+    :type columns: string or string list
+    :param x_axis: The label of the x-axis column (defaults to None)
+    :type x_axis: string, optional
+
+    :return: A list of Line2D objects representing the plotted data
+    :rtype: matplotlib.lines.Line2D list
+    """
+    df = pd.read_csv(path, delimiter='\t')
+    df = remove_notes(df)
+
+    if isinstance(columns, str):
+        y = pd.to_numeric(df.loc[:, columns])
+        if x_axis is None:
+            plt.plot(y)
+        else:
+            x = pd.to_numeric(df.loc[:, x_axis])
+            plt.plot(x, y)
+
+    elif isinstance(columns, list):
+        for c in columns:
+            y = pd.to_numeric(df.loc[:, c])
+            if x_axis is None:
+                plt.plot(y)
+            else:
+                x = pd.to_numeric(df.loc[:, x_axis])
+                plt.plot(x, y)
+    else:
+        raise ValueError('columns must be a string or list of strings')
+
+
+def iplot_columns(path, columns, x_axis=None):
+    """Plot columns of data, located by indexes, in the given data file.
+
+    :param path: The file path of the ProCoDA data file
+    :type path: string
+    :param columns: A single column index or list of column indexes
+    :type columns: int or int list
+    :param x_axis: The index of the x-axis column (defaults to None)
+    :type x_axis: int, optional
+    :param sep: The separator or delimiter, of the data file. Use ',' for CSV's, '\t' for TSV's.
+    :type sep: string
+
+    :return: a list of Line2D objects representing the plotted data
+    :rtype: matplotlib.lines.Line2D list
+    """
+    df = pd.read_csv(path, delimiter='\t')
+    df = remove_notes(df)
+
+    if isinstance(columns, int):
+        y = pd.to_numeric(df.iloc[:, columns])
+        if x_axis is None:
+            plt.plot(y)
+        else:
+            x = pd.to_numeric(df.iloc[:, x_axis])
+            plt.plot(x, y)
+
+    elif isinstance(columns, list):
+        for c in columns:
+            y = pd.to_numeric(df.iloc[:, c])
+            if x_axis is None:
+                plt.plot(y)
+            else:
+                x = pd.to_numeric(df.iloc[:, x_axis])
+                plt.plot(x, y)
+    else:
+        raise ValueError('columns must be an int or a list of ints')
 
 
 def notes(path):
@@ -101,12 +177,13 @@ def remove_notes(data):
 
 
 def get_data_by_time(path, columns, dates, start_time='00:00', end_time='23:59',
-                     extension='.tsv', units=""):
+                     extension='.tsv', units='', elapsed=False):
     """Extract columns of data over one or more ProCoDA data files based on date
     and time. Valid only for files whose names are automatically generated by
     date, i.e. of the form "datalog_M-D-YYYY".
 
-    Note: Column 0 is time. The first data column is column 1.
+    Note: Column 0 is time. The first data column is column 1. Results for the
+    time column are adjusted for multi-day experiments.
 
     :param path: The path to the folder containing the ProCoDA data file(s)
     :type path: string
@@ -120,8 +197,10 @@ def get_data_by_time(path, columns, dates, start_time='00:00', end_time='23:59',
     :type end_time: string, optional
     :param extension: File extension of the data file(s). Defaults to '.tsv'
     :type extension: string, optional
-    :param units: The units you want to apply to the data, e.g. 'mg/L'. Defaults to "" (dimensionless).
+    :param units: A single unit or list of units to apply to each column, e.g. 'mg/L' or ['hr', 'mg/L']. Defaults to '' (dimensionless).
     :type units: string, optional
+    :param elapsed: If true, results for the time column are given in elapsed time
+    :type elapsed: boolean
 
     :return: the single column of data or a list of the columns of data (in the order of the indexes given in the columns variable)
     :rtype: 1D or 2D float list
@@ -134,20 +213,39 @@ def get_data_by_time(path, columns, dates, start_time='00:00', end_time='23:59',
         data = get_data_by_time(path='/Users/.../ProCoDA Data/', columns=[0,4], dates='6-14-2018', start_time='12:20', end_time='23:59')
         data = get_data_by_time(path='/Users/.../ProCoDA Data/', columns=[0,3,4], dates='6-14-2018')
     """
-    data = data_from_dates(path, dates, extension)
+    # the file path url is not acceptable (ie contains 'github.com')
+    if 'github.com' in path:
+        path = path.replace('github.com', 'raw.githubusercontent.com')
+        path = path.replace('blob/', '')
+        path = path.replace('tree/', '')
+
+    data = data_from_dates(path, dates, extension) # combine data from each date
 
     first_time_column = pd.to_numeric(data[0].iloc[:, 0])
     start = max(day_fraction(start_time), first_time_column[0])
-    start_idx = time_column_index(start, first_time_column)
-    end_idx = time_column_index(day_fraction(end_time),
-                                pd.to_numeric(data[-1].iloc[:, 0])) + 1
+    start_idx = (first_time_column >= start).idxmax()
+    end_idx = (pd.to_numeric(data[-1].iloc[:, 0]) >=
+               day_fraction(end_time)).idxmax() + 1
 
     if isinstance(columns, int):
-        return column_start_to_end(data, columns, start_idx, end_idx)
-    else:
+        if columns == 0 and elapsed:
+            col = column_start_to_end(data, columns, start_idx, end_idx)
+            result = list(np.subtract(col, start))*u(units)
+        else:
+            result = column_start_to_end(data, columns, start_idx, end_idx)*u(units)
+    else: # columns is a list
+        if units == '':
+            units = ['']*len(columns)
         result = []
+        i = 0
         for c in columns:
-            result.append(column_start_to_end(data, c, start_idx, end_idx))
+            if c == 0 and elapsed:
+                col = column_start_to_end(data, c, start_idx, end_idx)
+                result.append(list(np.subtract(col, start))*u(units[i]))
+            else:
+                result.append(column_start_to_end(data, c, start_idx, end_idx)*u(units[i]))
+            i += 1
+
     return result
 
 
@@ -172,22 +270,6 @@ def day_fraction(time):
     return hour/24 + minute/1440
 
 
-def time_column_index(time, time_column):
-    """Return the index of the lowest time in the column of times that is greater
-    than or equal to the given time.
-
-    :param time: the time to index from the column of time; a day fraction
-    :type time: float
-    :param time_column: a list of times in day fractions, must be increasing and equally spaced
-    :type time_column: float list
-
-    :return: approximate index of the time from the column of times
-    :rtype: int
-    """
-    interval = time_column[1]-time_column[0]
-    return int(round((time - time_column[0])/interval + .5))
-
-
 def data_from_dates(path, dates, extension):
     """Return a list of DataFrames representing the ProCoDA data files stored in
     the given path and recorded on the given dates.
@@ -222,10 +304,10 @@ def column_start_to_end(data, column, start_idx, end_idx):
     :type data: Pandas.DataFrame list
     :param column: a column index
     :type column: int
-    :param start_idx: the index of the starting row
-    :type start_idx: int
-    :param start_idx: the index of the ending row
+    :param start_idx: the index of the starting row of the first DataFrame
     :type start_idx: int
+    :param end_idx: the index of the ending row of the last DataFrame, excluding this row
+    :type end_idx: int
 
     :return: a list of data from the given column
     :rtype: float list
@@ -238,6 +320,8 @@ def column_start_to_end(data, column, start_idx, end_idx):
             data[i].iloc[0, 0] = 0
             result += list(pd.to_numeric(data[i].iloc[:, column]) +
                       (i if column == 0 else 0))
+                      # assuming DataFrames are for consecutive days, add number of
+                      # DataFrame if dealing with the time column (column 0)
         data[-1].iloc[0, 0] = 0
         result += list(pd.to_numeric(data[-1].iloc[:end_idx, column]) +
                   (len(data)-1 if column == 0 else 0))
@@ -249,7 +333,8 @@ def get_data_by_state(path, dates, state, column, extension=".tsv"):
     """Reads a ProCoDA file and extracts the time and data column for each
     iteration of the given state.
 
-    Note: column 0 is time, the first data column is column 1.
+    Note: column 0 is time, the first data column is column 1. Results for the
+    time column are given in elasped time.
 
     :param path: The path to the folder containing the ProCoDA data file(s), defaults to the current directory
     :type path: string
@@ -271,6 +356,12 @@ def get_data_by_state(path, dates, state, column, extension=".tsv"):
 
         data = get_data_by_state(path='/Users/.../ProCoDA Data/', dates=["6-19-2013", "6-20-2013"], state=1, column=28)
     """
+    # the file path url is not acceptable (ie contains 'github.com')
+    if 'github.com' in path:
+        path = path.replace('github.com', 'raw.githubusercontent.com')
+        path = path.replace('blob/', '')
+        path = path.replace('tree/', '')
+
     data_agg = []
     day = 0
     first_day = True
@@ -302,25 +393,20 @@ def get_data_by_state(path, dates, state, column, extension=".tsv"):
             state_end = np.insert(state_end, 0, states[0, 0])
 
         if state_start_idx[-1]:
-            np.append(state_end, data[0, -1])
+            state_end = np.append(state_end, data[-1, 0])
 
         # get the corresponding indices in the data array
         data_start = []
         data_end = []
         for i in range(np.size(state_start)):
-            add_start = True
-            for j in range(np.size(data[:, 0])):
-                if (data[j, 0] > state_start[i]) and add_start:
-                    data_start.append(j)
-                    add_start = False
-                if data[j, 0] > state_end[i]:
-                    data_end.append(j-1)
-                    break
+            data_start.append((data[:, 0] > state_start[i]).argmax())
+            data_end.append((data[:, 0] > state_end[i]).argmax()-1)
+        if np.size(data_end) < np.size(data_start):
+            data_end = np.append(data_end, -1)
 
         if first_day:
             start_time = data[0, 0]
 
-        # extract data at those times
         for i in range(np.size(data_start)):
             t = data[data_start[i]:data_end[i], 0] + day - start_time
             if isinstance(column, int):
@@ -342,82 +428,6 @@ def get_data_by_state(path, dates, state, column, extension=".tsv"):
     return data_agg
 
 
-def plot_columns(path, columns, x_axis=None):
-    """Plot columns of data, located by labels, in the given data file.
-
-    :param path: The file path of the ProCoDA data file
-    :type path: string
-    :param columns: A single column label or list of column labels
-    :type columns: string or string list
-    :param x_axis: The label of the x-axis column (defaults to None)
-    :type x_axis: string, optional
-    :param sep: The separator or delimiter, of the data file. Use ',' for CSV's, '\t' for TSV's.
-    :type sep: string
-
-    :return: A list of Line2D objects representing the plotted data
-    :rtype: matplotlib.lines.Line2D list
-    """
-    df = pd.read_csv(path, delimiter='\t')
-    df = remove_notes(df)
-
-    if isinstance(columns, str):
-        y = pd.to_numeric(df.loc[:, columns])
-        if x_axis is None:
-            plt.plot(y)
-        else:
-            x = pd.to_numeric(df.loc[:, x_axis])
-            plt.plot(x, y)
-
-    elif isinstance(columns, list):
-        for c in columns:
-            y = pd.to_numeric(df.loc[:, c])
-            if x_axis is None:
-                plt.plot(y)
-            else:
-                x = pd.to_numeric(df.loc[:, x_axis])
-                plt.plot(x, y)
-    else:
-        raise ValueError('columns must be a string or list of strings')
-
-
-def iplot_columns(path, columns, x_axis=None):
-    """Plot columns of data, located by indexes, in the given data file.
-
-    :param path: The file path of the ProCoDA data file
-    :type path: string
-    :param columns: A single column index or list of column indexes
-    :type columns: int or int list
-    :param x_axis: The index of the x-axis column (defaults to None)
-    :type x_axis: int, optional
-    :param sep: The separator or delimiter, of the data file. Use ',' for CSV's, '\t' for TSV's.
-    :type sep: string
-
-    :return: a list of Line2D objects representing the plotted data
-    :rtype: matplotlib.lines.Line2D list
-    """
-    df = pd.read_csv(path, delimiter='\t')
-    df = remove_notes(df)
-
-    if isinstance(columns, int):
-        y = pd.to_numeric(df.iloc[:, columns])
-        if x_axis is None:
-            plt.plot(y)
-        else:
-            x = pd.to_numeric(df.iloc[:, x_axis])
-            plt.plot(x, y)
-
-    elif isinstance(columns, list):
-        for c in columns:
-            y = pd.to_numeric(df.iloc[:, c])
-            if x_axis is None:
-                plt.plot(y)
-            else:
-                x = pd.to_numeric(df.iloc[:, x_axis])
-                plt.plot(x, y)
-    else:
-        raise ValueError('columns must be an int or a list of ints')
-
-
 def read_state(dates, state, column, units="", path="", extension=".tsv"):
     """Reads a ProCoDA file and outputs the data column and time vector for
     each iteration of the given state.
@@ -446,72 +456,7 @@ def read_state(dates, state, column, units="", path="", extension=".tsv"):
 
         time, data = read_state(["6-19-2013", "6-20-2013"], 1, 28, "mL/s")
     """
-    data_agg = []
-    day = 0
-    first_day = True
-    overnight = False
-
-    if not isinstance(dates, list):
-        dates = [dates]
-
-    for d in dates:
-        state_file = path + "statelog_" + d + extension
-        data_file = path + "datalog_" + d + extension
-
-        states = pd.read_csv(state_file, delimiter='\t')
-        data = pd.read_csv(data_file, delimiter='\t')
-
-        states = np.array(states)
-        data = np.array(data)
-
-        # get the start and end times for the state
-        state_start_idx = states[:, 1] == state
-        state_start = states[state_start_idx, 0]
-        state_end_idx = np.append([False], state_start_idx[0:(np.size(state_start_idx)-1)])
-        state_end = states[state_end_idx, 0]
-
-        if overnight:
-            state_start = np.insert(state_start, 0, 0)
-            state_end = np.insert(state_end, 0, states[0, 0])
-
-        if state_start_idx[-1]:
-            state_end.append(data[0, -1])
-
-        # get the corresponding indices in the data array
-        data_start = []
-        data_end = []
-        for i in range(np.size(state_start)):
-            add_start = True
-            for j in range(np.size(data[:, 0])):
-                if (data[j, 0] > state_start[i]) and add_start:
-                    data_start.append(j)
-                    add_start = False
-                if (data[j, 0] > state_end[i]):
-                    data_end.append(j-1)
-                    break
-
-        if first_day:
-            start_time = data[1, 0]
-
-        # extract data at those times
-        for i in range(np.size(data_start)):
-            t = data[data_start[i]:data_end[i], 0] + day - start_time
-            if isinstance(column, int):
-                c = data[data_start[i]:data_end[i], column]
-            else:
-                c = data[column][data_start[i]:data_end[i]]
-            if overnight and i == 0:
-                data_agg = np.insert(data_agg[-1], np.size(data_agg[-1][:, 0]),
-                                     np.vstack((t, c)).T)
-            else:
-                data_agg.append(np.vstack((t, c)).T)
-
-        day += 1
-        if first_day:
-            first_day = False
-        if state_start_idx[-1]:
-            overnight = True
-
+    data_agg = get_data_by_state(path, dates, state, column, extension)
     data_agg = np.vstack(data_agg)
     if units != "":
         return data_agg[:, 0]*u.day, data_agg[:, 1]*u(units)
@@ -548,73 +493,11 @@ def average_state(dates, state, column, units="", path="", extension=".tsv"):
         data_avgs = average_state(["6-19-2013", "6-20-2013"], 1, 28, "mL/s")
 
     """
-    data_agg = []
-    day = 0
-    first_day = True
-    overnight = False
-
-    if not isinstance(dates, list):
-        dates = [dates]
-
-    for d in dates:
-        state_file = path + "statelog_" + d + extension
-        data_file = path + "datalog_" + d + extension
-
-        states = pd.read_csv(state_file, delimiter='\t')
-        data = pd.read_csv(data_file, delimiter='\t')
-
-        states = np.array(states)
-        data = np.array(data)
-
-        # get the start and end times for the state
-        state_start_idx = states[:, 1] == state
-        state_start = states[state_start_idx, 0]
-        state_end_idx = np.append([False], state_start_idx[0:(np.size(state_start_idx)-1)])
-        state_end = states[state_end_idx, 0]
-
-        if overnight:
-            state_start = np.insert(state_start, 0, 0)
-            state_end = np.insert(state_end, 0, states[0, 0])
-
-        if state_start_idx[-1]:
-            state_end.append(data[0, -1])
-
-        # get the corresponding indices in the data array
-        data_start = []
-        data_end = []
-        for i in range(np.size(state_start)):
-            add_start = True
-            for j in range(np.size(data[:, 0])):
-                if (data[j, 0] > state_start[i]) and add_start:
-                    data_start.append(j)
-                    add_start = False
-                if (data[j, 0] > state_end[i]):
-                    data_end.append(j-1)
-                    break
-
-        if first_day:
-            start_time = data[1, 0]
-
-        # extract data at those times
-        for i in range(np.size(data_start)):
-            if isinstance(column, int):
-                c = data[data_start[i]:data_end[i], column]
-            else:
-                c = data[column][data_start[i]:data_end[i]]
-            if overnight and i == 0:
-                data_agg = np.insert(data_agg[-1], np.size(data_agg[-1][:]), c)
-            else:
-                data_agg.append(c)
-
-        day += 1
-        if first_day:
-            first_day = False
-        if state_start_idx[-1]:
-            overnight = True
+    data_agg = get_data_by_state(path, dates, state, column, extension)
 
     averages = np.zeros(np.size(data_agg))
     for i in range(np.size(data_agg)):
-        averages[i] = np.average(data_agg[i])
+        averages[i] = np.average(data_agg[i][:,1])
 
     if units != "":
         return averages*u(units)
@@ -662,76 +545,14 @@ def avg_with_units(lst):
 
         data_avgs = perform_function_on_state(avg_with_units, ["6-19-2013", "6-20-2013"], 1, 28, "mL/s")
     """
-    data_agg = []
-    day = 0
-    first_day = True
-    overnight = False
-
-    if not isinstance(dates, list):
-        dates = [dates]
-
-    for d in dates:
-        state_file = path + "statelog_" + d + extension
-        data_file = path + "datalog_" + d + extension
-
-        states = pd.read_csv(state_file, delimiter='\t')
-        data = pd.read_csv(data_file, delimiter='\t')
-
-        states = np.array(states)
-        data = np.array(data)
-
-        # get the start and end times for the state
-        state_start_idx = states[:, 1] == state
-        state_start = states[state_start_idx, 0]
-        state_end_idx = np.append([False], state_start_idx[0:(np.size(state_start_idx)-1)])
-        state_end = states[state_end_idx, 0]
-
-        if overnight:
-            state_start = np.insert(state_start, 0, 0)
-            state_end = np.insert(state_end, 0, states[0, 0])
-
-        if state_start_idx[-1]:
-            state_end.append(data[0, -1])
-
-        # get the corresponding indices in the data array
-        data_start = []
-        data_end = []
-        for i in range(np.size(state_start)):
-            add_start = True
-            for j in range(np.size(data[:, 0])):
-                if (data[j, 0] > state_start[i]) and add_start:
-                    data_start.append(j)
-                    add_start = False
-                if (data[j, 0] > state_end[i]):
-                    data_end.append(j-1)
-                    break
-
-        if first_day:
-            start_time = data[1, 0]
-
-        # extract data at those times
-        for i in range(np.size(data_start)):
-            if isinstance(column, int):
-                c = data[data_start[i]:data_end[i], column]
-            else:
-                c = data[column][data_start[i]:data_end[i]]
-            if overnight and i == 0:
-                data_agg = np.insert(data_agg[-1], np.size(data_agg[-1][:]), c)
-            else:
-                data_agg.append(c)
-
-        day += 1
-        if first_day:
-            first_day = False
-        if state_start_idx[-1]:
-            overnight = True
+    data_agg = get_data_by_state(path, dates, state, column, extension)
 
     output = np.zeros(np.size(data_agg))
     for i in range(np.size(data_agg)):
         if units != "":
-            output[i] = func(data_agg[i]*u(units)).magnitude
+            output[i] = func(data_agg[i][:,1]*u(units)).magnitude
         else:
-            output[i] = func(data_agg[i])
+            output[i] = func(data_agg[i][:,1])
 
     if units != "":
         return output*func(data_agg[i]*u(units)).units
@@ -889,3 +710,40 @@ def write_calculations_to_csv(funcs, states, columns, path, headers, out_name,
     output.to_csv(out_name, sep='\t')
 
     return output
+
+
+def intersect(x, y1, y2):
+    """Returns the intersections of two lines represented by a common set of x coordinates and
+    two sets of y coordinates as three numpy arrays: the x coordinates of the intersections,
+    the y coordinates of the intersections, and the indexes in x, y1, y2 immediately
+    after the intersections.
+
+    :param x: common set of x coordinates for the two lines
+    :type x: numpy.ndarray
+    :param y1: the y coordinates of the first line
+    :type y1: numpy.ndarray
+    :param y2: the y coordinates of the second line
+    :type y2: numpy.ndarray
+
+    :requires: x have no repeating values and is in ascending order
+
+    :return: x_points-numpy.ndarray of the x coordinates where intersections occur
+    :return: y_points-numpy.ndarray of the y coordinates where intersections occur
+    :return: crossings-numpy.ndarray of the indexes after the intersections occur
+    """
+    x_points = np.array([])
+    y_points = np.array([])
+    crossings = (np.argwhere(np.diff(np.sign(y1-y2)))+1).flatten()
+
+    for c in crossings:
+      slope1 = (y1[c] - y1[c-1]) / (x[c] - x[c-1])
+      slope2 = (y2[c] - y2[c-1]) / (x[c] - x[c-1])
+      b1 = y1[c] - slope1 * x[c]
+      b2 = y2[c] - slope2 * x[c]
+
+      x_points = np.append(x_points, (b2-b1)/(slope1-slope2))
+      y_points = np.append(y_points, slope1*(b2-b1)/(slope1-slope2) + b1)
+
+    return x_points, y_points, crossings
+
+    
diff --git a/setup.py b/setup.py
index 094f1bf2..d690c87a 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name = 'aguaclara',
-    version = '0.2.5',
+    version = '0.2.6',
     description = (
         'An open-source Python package for designing and performing research '
         'on AguaClara water treatment plants.'
diff --git a/tests/research/data/datalog_11-5-2019.csv b/tests/research/data/datalog_11-5-2019.csv
new file mode 100644
index 00000000..4b4c3560
--- /dev/null
+++ b/tests/research/data/datalog_11-5-2019.csv
@@ -0,0 +1,67 @@
+Day fraction since midnight on 11/5/2019,7 kPa (cm),water flow (mL/s),coag flow (mL/s),max x + 1 (),max cycles (),cycles ()
+0.36840286,0,0.5067,0,6,12,0
+0.36846073,0,0.5067,0,6,12,0
+0.36854175,0,0.5067,0,6,12,0
+0.36855332,0,0.5067,0,6,12,0
+0.3685649,0,0.5067,0,6,12,1
+0.36857646,0,0.5067,0,6,12,1
+0.36858804,0,0.5067,0,6,12,1
+0.36859962,0,0.5067,0,6,12,1
+0.36861119,0,0.5067,0,6,12,1
+0.36862276,0,0.5067,0,6,12,1
+0.36863434,0,0.5067,0,6,12,1
+0.36864591,0,0.5067,0,6,12,1
+0.36865749,0,0.5067,0,6,12,1
+0.36866906,0,0.5067,0,6,12,1
+0.36868065,0,0.5067,0,6,12,1
+0.36869221,0,0.5067,0,6,12,1
+0.36870379,0,0.5067,0,6,12,1
+0.36871537,0,0.5067,0,6,12,1
+0.36872693,0,0.5067,0,6,12,1
+0.36873852,0,0.5067,0,6,12,1
+0.3687501,0,0.5067,0,6,12,1
+0.36876166,0,0.5067,0,6,12,1
+0.36877323,0,0.5067,0,6,12,1
+0.36878481,0,0.5067,0,6,12,2
+0.36879637,0,0.5067,0,6,12,2
+0.36880795,0,0.5067,0,6,12,2
+0.36881954,0,0.5058731,0.0008269,6,12,3
+0.3688311,0,0.5058731,0.0008269,6,12,3
+0.36884267,0,0.5058731,0.0008269,6,12,3
+0.36885425,0,0.5058731,0.0008269,6,12,4
+0.36886582,0,0.5058731,0.0008269,6,12,4
+0.3688774,0,0.5058731,0.0008269,6,12,4
+0.36888897,0,0.5050462,0.0016538,6,12,5
+0.36890054,0,0.5050462,0.0016538,6,12,5
+0.36891212,0,0.5050462,0.0016538,6,12,5
+0.36892369,0,0.5050462,0.0016538,6,12,6
+0.36893526,0,0.5050462,0.0016538,6,12,6
+0.36894684,0,0.5050462,0.0016538,6,12,6
+0.36895841,0,0.5033924,0.0033076,6,12,7
+0.36896998,0,0.5033924,0.0033076,6,12,7
+0.36898156,0,0.5033924,0.0033076,6,12,7
+0.36899313,0,0.5033924,0.0033076,6,12,8
+0.36900471,0,0.5033924,0.0033076,6,12,8
+0.36901629,0,0.5033924,0.0033076,6,12,8
+0.36902785,0,0.5000848,0.0066152,6,12,9
+0.36903944,0,0.5000848,0.0066152,6,12,9
+0.36905102,0,0.5000848,0.0066152,6,12,9
+0.36906258,0,0.5000848,0.0066152,6,12,10
+0.36907415,0,0.5000848,0.0066152,6,12,10
+0.36908573,0,0.5000848,0.0066152,6,12,10
+0.3690973,0,0.4934696,0.0132304,6,12,11
+0.36910888,0,0.4934696,0.0132304,6,12,11
+0.36912045,0,0.4934696,0.0132304,6,12,11
+0.36913202,0,0.4934696,0.0132304,6,12,12
+0.3691436,0,0.4934696,0.0132304,6,12,12
+0.36915518,0,0.4934696,0.0132304,6,12,12
+0.36916674,0,0.4802392,0.0264608,6,12,13
+0.36917832,0,0.4802392,0.0264608,6,12,13
+0.3691899,0,0.4802392,0.0264608,6,12,13
+0.36920146,0,0.4802392,0.0264608,6,12,14
+0.36921304,0,0.4802392,0.0264608,6,12,14
+0.36922462,0,0.4802392,0.0264608,6,12,14
+0.36923619,0,0.4802392,0.0264608,6,12,14
+0.36924776,0,0.5067,0,6,12,15
+0.36925934,0,0.5067,0,6,12,15
+0.36926282,0,0.5067,0,6,12,15
\ No newline at end of file
diff --git a/tests/research/data/datalog_11-5-2019.tsv b/tests/research/data/datalog_11-5-2019.tsv
new file mode 100644
index 00000000..09427881
--- /dev/null
+++ b/tests/research/data/datalog_11-5-2019.tsv
@@ -0,0 +1 @@
+Day fraction since midnight on 11/5/2019	7 kPa (cm)	water flow (mL/s)	coag flow (mL/s)	max x + 1 ()	max cycles ()	cycles ()0.36840286	0	0.5067	0	6	12	00.36846073	0	0.5067	0	6	12	00.36854175	0	0.5067	0	6	12	00.36855332	0	0.5067	0	6	12	00.3685649	0	0.5067	0	6	12	10.36857646	0	0.5067	0	6	12	10.36858804	0	0.5067	0	6	12	10.36859962	0	0.5067	0	6	12	10.36861119	0	0.5067	0	6	12	10.36862276	0	0.5067	0	6	12	10.36863434	0	0.5067	0	6	12	10.36864591	0	0.5067	0	6	12	10.36865749	0	0.5067	0	6	12	10.36866906	0	0.5067	0	6	12	10.36868065	0	0.5067	0	6	12	10.36869221	0	0.5067	0	6	12	10.36870379	0	0.5067	0	6	12	10.36871537	0	0.5067	0	6	12	10.36872693	0	0.5067	0	6	12	10.36873852	0	0.5067	0	6	12	10.3687501	0	0.5067	0	6	12	10.36876166	0	0.5067	0	6	12	10.36877323	0	0.5067	0	6	12	10.36878481	0	0.5067	0	6	12	20.36879637	0	0.5067	0	6	12	20.36880795	0	0.5067	0	6	12	20.36881954	0	0.5058731	0.0008269	6	12	30.3688311	0	0.5058731	0.0008269	6	12	30.36884267	0	0.5058731	0.0008269	6	12	30.36885425	0	0.5058731	0.0008269	6	12	40.36886582	0	0.5058731	0.0008269	6	12	40.3688774	0	0.5058731	0.0008269	6	12	40.36888897	0	0.5050462	0.0016538	6	12	50.36890054	0	0.5050462	0.0016538	6	12	50.36891212	0	0.5050462	0.0016538	6	12	50.36892369	0	0.5050462	0.0016538	6	12	60.36893526	0	0.5050462	0.0016538	6	12	60.36894684	0	0.5050462	0.0016538	6	12	60.36895841	0	0.5033924	0.0033076	6	12	70.36896998	0	0.5033924	0.0033076	6	12	70.36898156	0	0.5033924	0.0033076	6	12	70.36899313	0	0.5033924	0.0033076	6	12	80.36900471	0	0.5033924	0.0033076	6	12	80.36901629	0	0.5033924	0.0033076	6	12	80.36902785	0	0.5000848	0.0066152	6	12	90.36903944	0	0.5000848	0.0066152	6	12	90.36905102	0	0.5000848	0.0066152	6	12	90.36906258	0	0.5000848	0.0066152	6	12	100.36907415	0	0.5000848	0.0066152	6	12	100.36908573	0	0.5000848	0.0066152	6	12	100.3690973	0	0.4934696	0.0132304	6	12	110.36910888	0	0.4934696	0.0132304	6	12	110.36912045	0	0.4934696	0.0132304	6	12	110.36913202	0	0.4934696	0.0132304	6	12	120.3691436	0	0.4934696	0.0132304	6	12	120.36915518	0	0.4934696	0.0132304	6	12	120.36916674	0	0.4802392	0.0264608	6	12	130.36917832	0	0.4802392	0.0264608	6	12	130.3691899	0	0.4802392	0.0264608	6	12	130.36920146	0	0.4802392	0.0264608	6	12	140.36921304	0	0.4802392	0.0264608	6	12	140.36922462	0	0.4802392	0.0264608	6	12	140.36923619	0	0.4802392	0.0264608	6	12	140.36924776	0	0.5067	0	6	12	150.36925934	0	0.5067	0	6	12	150.36926282	0	0.5067	0	6	12	15
\ No newline at end of file
diff --git a/tests/research/data/datalog_6-16-2018.xls b/tests/research/data/datalog_6-16-2018.xls
new file mode 100644
index 00000000..471589bc
--- /dev/null
+++ b/tests/research/data/datalog_6-16-2018.xls
@@ -0,0 +1,3 @@
+Day fraction since midnight on 6/15/2018	Sensor (cm)	Temperature (C)	Influent Turbidity (NTU)	Effluent Turbidity ()	PumpControl(Clay) ()	drain control ()	Scale (g)
+0.00005238	7.39367676	24.26708603	99.84	6.1	0.01503953	0	-999999
+0.00016812	7.2499795	24.26683617	100.55	6.12	0.01297213	0	-999999
\ No newline at end of file
diff --git a/tests/research/data/statelog_11-5-2019.csv b/tests/research/data/statelog_11-5-2019.csv
new file mode 100644
index 00000000..fe1fb055
--- /dev/null
+++ b/tests/research/data/statelog_11-5-2019.csv
@@ -0,0 +1,33 @@
+Day fraction since midnight on 11/5/2019, State ID, State name, Rule that caused previous state to end
+0.36842392,0,OFF,Operator changed the state
+0.3685598,2,Warmup,Operator changed the state
+0.36875633,1,Run,go to run
+0.36877994,2,Warmup,go to warmup
+0.36879175,1,Run,go to run
+0.36881547,2,Warmup,go to warmup
+0.36882739,1,Run,go to run
+0.3688509,2,Warmup,go to warmup
+0.36886304,1,Run,go to run
+0.36888666,2,Warmup,go to warmup
+0.36889881,1,Run,go to run
+0.36892231,2,Warmup,go to warmup
+0.36893422,1,Run,go to run
+0.36895784,2,Warmup,go to warmup
+0.36896999,1,Run,go to run
+0.36899372,2,Warmup,go to warmup
+0.36900587,1,Run,go to run
+0.36902936,2,Warmup,go to warmup
+0.36904117,1,Run,go to run
+0.36906502,2,Warmup,go to warmup
+0.36907704,1,Run,go to run
+0.36910055,2,Warmup,go to warmup
+0.3691127,1,Run,go to run
+0.36913631,2,Warmup,go to warmup
+0.36914801,1,Run,go to run
+0.36917172,2,Warmup,go to warmup
+0.36918353,1,Run,go to run
+0.36920725,2,Warmup,go to warmup
+0.36921894,1,Run,go to run
+0.36924256,2,Warmup,go to warmup
+0.36925471,1,Run,go to run
+0.3692686,0,OFF,Operator changed the state
\ No newline at end of file
diff --git a/tests/research/data/statelog_11-5-2019.tsv b/tests/research/data/statelog_11-5-2019.tsv
new file mode 100644
index 00000000..c12ffd63
--- /dev/null
+++ b/tests/research/data/statelog_11-5-2019.tsv
@@ -0,0 +1 @@
+Day fraction since midnight on 11/5/2019	 State ID	 State name	 Rule that caused previous state to end0.36842392	0	OFF	Operator changed the state0.3685598	2	Warmup	Operator changed the state0.36875633	1	Run	go to run0.36877994	2	Warmup	go to warmup0.36879175	1	Run	go to run0.36881547	2	Warmup	go to warmup0.36882739	1	Run	go to run0.3688509	2	Warmup	go to warmup0.36886304	1	Run	go to run0.36888666	2	Warmup	go to warmup0.36889881	1	Run	go to run0.36892231	2	Warmup	go to warmup0.36893422	1	Run	go to run0.36895784	2	Warmup	go to warmup0.36896999	1	Run	go to run0.36899372	2	Warmup	go to warmup0.36900587	1	Run	go to run0.36902936	2	Warmup	go to warmup0.36904117	1	Run	go to run0.36906502	2	Warmup	go to warmup0.36907704	1	Run	go to run0.36910055	2	Warmup	go to warmup0.3691127	1	Run	go to run0.36913631	2	Warmup	go to warmup0.36914801	1	Run	go to run0.36917172	2	Warmup	go to warmup0.36918353	1	Run	go to run0.36920725	2	Warmup	go to warmup0.36921894	1	Run	go to run0.36924256	2	Warmup	go to warmup0.36925471	1	Run	go to run0.3692686	0	OFF	Operator changed the state
\ No newline at end of file
diff --git a/tests/research/test_ProCoDA_Parser.py b/tests/research/test_ProCoDA_Parser.py
index beb6cc35..b0b75223 100644
--- a/tests/research/test_ProCoDA_Parser.py
+++ b/tests/research/test_ProCoDA_Parser.py
@@ -3,47 +3,175 @@
 """
 
 import unittest
-from aguaclara.research.procoda_parser import *
+import aguaclara.research.procoda_parser as pp
+from aguaclara.core.units import u
+import pandas as pd
+import numpy as np
+import os
+from matplotlib.testing.compare import compare_images
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+
 
 class TestProCoDAParser(unittest.TestCase):
 
-    def test_get_data_by_time(self):
-        '''
-        Extract column(s) of data between given starting and ending days and times
-        '''
-        path = os.path.join(os.path.dirname(__file__), '.', 'data')
+    def test_column_of_data(self):
+        '''''
+        Extract other columns of data and append units.
+        '''''
+        path = os.path.join(os.path.dirname(__file__), '.', 'data', 'example datalog.xls')
+        answer = pp.column_of_data(path, 50, 1, units='mg/L')
+        answer = np.round(answer, 5)
+        self.assertSequenceEqual(
+        answer.tolist(),
+        np.round(np.array([ 21.61681747,  21.31163216,  20.80215263,  20.46752739,
+        20.1048584 ,  19.7037487 ,  19.4194355 ,  18.95934677,
+        18.65832138,  18.24054337,  17.93864632,  17.591259  ,
+        17.25979805,  16.98148346,  16.60666656,  16.28514862,
+        15.99366856,  15.72474861,  15.35812187,  15.11634636,
+        14.75801468,  14.53341103,  14.20829868,  13.94124603,
+        13.69845104,  13.42016983,  13.17064667,  12.94155121,
+        12.66110611,  12.36821651,  12.1641016 ,  11.91081715,
+        11.69137764,  11.46448898,  11.2214098 ,  11.03143692,
+        10.78680801,  10.56936836,  10.36802101,  10.17097855,
+         9.95537758,   9.78312111,   9.55150509,   9.3843832 ,
+         9.21883678,   9.03395939,   8.85475636,   8.68857765,
+         8.47574997,   8.33256149,   8.13628197,   7.96697569,
+         7.80458403,   7.68562984,   7.4511261 ,   7.34629679,
+         7.17365456,   7.03930044,   6.88661861,   6.73307562,
+         6.60730886,   6.45987988,   6.30656338,   6.18089199,
+         6.05378485,   5.90268421,   5.81327915,   5.68042564,
+         5.57657337,   5.40122986,   5.33153057,   5.19660377,
+         5.09033108,   4.96228552,   4.85437012,   4.76652002,
+         4.66415834,   4.54592991,   4.43500376,   4.34614754,
+         4.24292231,   4.16423607,   4.06328297,   3.96581864,
+         3.88231015,   3.7828486 ,   3.74253488,   3.62953901,
+         3.53508115,   3.46755266,   3.36818004,   3.30672598,
+         3.22161722,   3.13899183,   3.08345532,   2.98398542,
+         2.94956589,   2.8504107 ,   2.79215455,   2.72924852,
+         2.66635823,   2.60831141,   2.53093195,   2.47217631,
+         2.42190933,   2.36228228,   2.30094266,   2.24602866,
+         2.19216943,   2.14143515,   2.10641694,   2.07170939,
+         2.04412961,   2.0158174 ,   2.00059986,   1.98546684,
+         1.97646523,   1.96455812,   1.95887971,   1.94987118])*u('mg/L'), 5).tolist()
+        )
 
-        data_day1 = pd.read_csv(path + '/datalog_6-14-2018.xls', delimiter='\t')
-        data_day1 = np.round([pd.to_numeric(data_day1.iloc[:, 0]), pd.to_numeric(data_day1.iloc[:, 4])], 5)
-        data_day1 = [data_day1[0].tolist(), data_day1[1].tolist()]
+        path = os.path.join(os.path.dirname(__file__), '.', 'data', 'example datalog.xls')
+        answer = pp.column_of_data(path, 50, "red dye (mg/L)", units='mg/L')
+        answer = np.round(answer, 5)
+        self.assertSequenceEqual(
+        answer.tolist(),
+        np.round(np.array([ 21.61681747,  21.31163216,  20.80215263,  20.46752739,
+        20.1048584 ,  19.7037487 ,  19.4194355 ,  18.95934677,
+        18.65832138,  18.24054337,  17.93864632,  17.591259  ,
+        17.25979805,  16.98148346,  16.60666656,  16.28514862,
+        15.99366856,  15.72474861,  15.35812187,  15.11634636,
+        14.75801468,  14.53341103,  14.20829868,  13.94124603,
+        13.69845104,  13.42016983,  13.17064667,  12.94155121,
+        12.66110611,  12.36821651,  12.1641016 ,  11.91081715,
+        11.69137764,  11.46448898,  11.2214098 ,  11.03143692,
+        10.78680801,  10.56936836,  10.36802101,  10.17097855,
+         9.95537758,   9.78312111,   9.55150509,   9.3843832 ,
+         9.21883678,   9.03395939,   8.85475636,   8.68857765,
+         8.47574997,   8.33256149,   8.13628197,   7.96697569,
+         7.80458403,   7.68562984,   7.4511261 ,   7.34629679,
+         7.17365456,   7.03930044,   6.88661861,   6.73307562,
+         6.60730886,   6.45987988,   6.30656338,   6.18089199,
+         6.05378485,   5.90268421,   5.81327915,   5.68042564,
+         5.57657337,   5.40122986,   5.33153057,   5.19660377,
+         5.09033108,   4.96228552,   4.85437012,   4.76652002,
+         4.66415834,   4.54592991,   4.43500376,   4.34614754,
+         4.24292231,   4.16423607,   4.06328297,   3.96581864,
+         3.88231015,   3.7828486 ,   3.74253488,   3.62953901,
+         3.53508115,   3.46755266,   3.36818004,   3.30672598,
+         3.22161722,   3.13899183,   3.08345532,   2.98398542,
+         2.94956589,   2.8504107 ,   2.79215455,   2.72924852,
+         2.66635823,   2.60831141,   2.53093195,   2.47217631,
+         2.42190933,   2.36228228,   2.30094266,   2.24602866,
+         2.19216943,   2.14143515,   2.10641694,   2.07170939,
+         2.04412961,   2.0158174 ,   2.00059986,   1.98546684,
+         1.97646523,   1.96455812,   1.95887971,   1.94987118])*u('mg/L'), 5).tolist()
+        )
 
-        data_day2 = pd.read_csv(path + '/datalog_6-15-2018.xls', delimiter='\t')
-        data_day2 = np.round([pd.to_numeric(data_day2.iloc[:, 0]), pd.to_numeric(data_day2.iloc[:, 4])], 5)
-        data_day2 = [data_day2[0].tolist(), data_day2[1].tolist()]
-        data_day2[0][0] = 0  # to remove scientific notation "e-"
 
-        # SINGLE COLUMN, ONE DAY
-        output = get_data_by_time(path=path, columns=0, dates="6-14-2018", start_time="12:20", end_time="13:00", extension=".xls")
-        self.assertSequenceEqual(np.round(output, 5).tolist(), data_day1[0][1041:1282])
+    def test_column_of_time(self):
+        '''''
+        Extract the time column from a data file.
+        '''''
+        path = os.path.join(os.path.dirname(__file__), '.', 'data', 'example datalog.xls')
+        answer = pp.column_of_time(path, 50)
+        answer = np.round(answer, 5)
+        self.assertSequenceEqual(
+         answer.tolist(),
+         np.round(np.array([0.00000000e+00,   5.78662000e-05,   1.15725500e-04,
+         1.73586900e-04,   2.31470400e-04,   2.89325100e-04,
+         3.47199600e-04,   4.05070800e-04,   4.62941200e-04,
+         5.20805100e-04,   5.78682300e-04,   6.36541000e-04,
+         6.94405500e-04,   7.52295200e-04,   8.10152600e-04,
+         8.68025100e-04,   9.25879200e-04,   9.83766900e-04,
+         1.04163170e-03,   1.09949610e-03,   1.15736260e-03,
+         1.21522990e-03,   1.27310590e-03,   1.33096560e-03,
+         1.38884810e-03,   1.44671260e-03,   1.50456890e-03,
+         1.56244910e-03,   1.62031940e-03,   1.67819090e-03,
+         1.73605480e-03,   1.79390590e-03,   1.85178640e-03,
+         1.90965780e-03,   1.96752080e-03,   2.02538760e-03,
+         2.08325540e-03,   2.14113380e-03,   2.19899280e-03,
+         2.25686180e-03,   2.31473400e-03,   2.37261100e-03,
+         2.43048170e-03,   2.48834570e-03,   2.54620210e-03,
+         2.60408890e-03,   2.66194550e-03,   2.71981170e-03,
+         2.77768240e-03,   2.83556180e-03,   2.89342620e-03,
+         2.95130110e-03,   3.00916580e-03,   3.06704400e-03,
+         3.12490300e-03,   3.18278490e-03,   3.24064920e-03,
+         3.29852180e-03,   3.35638230e-03,   3.41425150e-03,
+         3.47212870e-03,   3.52999870e-03,   3.58786830e-03,
+         3.64572740e-03,   3.70359810e-03,   3.76146930e-03,
+         3.81933520e-03,   3.87721010e-03,   3.93506860e-03,
+         3.99295440e-03,   4.05082240e-03,   4.10868470e-03,
+         4.16654890e-03,   4.22442890e-03,   4.28230160e-03,
+         4.34016650e-03,   4.39804130e-03,   4.45591720e-03,
+         4.51377060e-03,   4.57164920e-03,   4.62952340e-03,
+         4.68739510e-03,   4.74524320e-03,   4.80312930e-03,
+         4.86098350e-03,   4.91887450e-03,   4.97673430e-03,
+         5.03459310e-03,   5.09248050e-03,   5.15033640e-03,
+         5.20820950e-03,   5.26607440e-03,   5.32394690e-03,
+         5.38181660e-03,   5.43967960e-03,   5.49755470e-03,
+         5.55543130e-03,   5.61330110e-03,   5.67117330e-03,
+         5.72903190e-03,   5.78690100e-03,   5.84477570e-03,
+         5.90264880e-03,   5.96051240e-03,   6.01837960e-03,
+         6.07625150e-03,   6.13413050e-03,   6.19199110e-03,
+         6.24987260e-03,   6.30772900e-03,   6.36560880e-03,
+         6.42346920e-03,   6.48135320e-03,   6.53921020e-03,
+         6.59709090e-03,   6.65494290e-03,   6.71281870e-03,
+         6.77069570e-03,   6.82855640e-03,   6.88642010e-03])*u.day, 5).tolist()
+        )
 
-        # SINGLE COLUMN, TWO DAYS
-        output = get_data_by_time(path=path, columns=0, dates=["6-14-2018", "6-15-2018"],
-                                  start_time="12:20", end_time="10:50", extension=".xls")
-        time_column = data_day1[0][1041:] + np.round(np.array(data_day2[0][:3901])+1, 5).tolist()
-        self.assertSequenceEqual(np.round(output, 5).tolist(), time_column)
+        answer = pp.column_of_time(path, 50, end=60, units='hr')
+        answer = np.round(answer, 5)
+        self.assertSequenceEqual(
+         answer.tolist(),
+         np.round(np.array([0.00000000e+00,   5.78662000e-05,   1.15725500e-04,
+         1.73586900e-04,   2.31470400e-04,   2.89325100e-04,
+         3.47199600e-04,   4.05070800e-04,   4.62941200e-04,
+         5.20805100e-04])*24*u.hr, 5).tolist()
+        )
 
-        # MULTI COLUMN, ONE DAY
-        output = get_data_by_time(path=path, columns=[0, 4], dates=["6-14-2018"], start_time="12:20",
-                                  end_time="13:00", extension=".xls")
-        self.assertSequenceEqual(np.round(output[0], 5).tolist(), data_day1[0][1041:1282])
-        self.assertSequenceEqual(np.round(output[1], 5).tolist(), data_day1[1][1041:1282])
 
-        # MULTI COLUMN, TWO DAYS
-        output = get_data_by_time(path=path, columns=[0, 4], dates=["6-14-2018", "6-15-2018"],
-                                  start_time="12:20", end_time="10:50", extension=".xls")
-        time_column = data_day1[0][1041:] + np.round(np.array(data_day2[0][:3901])+1, 5).tolist()
-        self.assertSequenceEqual(np.round(output[0], 5).tolist(), time_column)
-        self.assertSequenceEqual(np.round(output[1], 5).tolist(), data_day1[1][1041:]+data_day2[1][:3901])
+    def test_notes(self):
+        '''''
+        Test function that extracts meta information from data file.
+        '''''
+        path = os.path.join(os.path.dirname(__file__), '.', 'data', 'example datalog.xls')
+        answer = pp.notes(path)['Day fraction since midnight on ']
+        x = pd.DataFrame(index=[1, 29, 35],
+                         columns=['Day fraction since midnight on ', 'red dye (mg/L)', 'Run Pump ()', 'Pump ()'])
+        x.iloc[0][0] = 'Start'
+        x.iloc[1][0] = 'Start'
+        x.iloc[2][0] = '30 mg/L'
+        self.assertSequenceEqual(
+        answer.tolist(),
+        x['Day fraction since midnight on '].tolist())
+
 
     def test_remove_notes(self):
         '''
@@ -51,7 +179,7 @@ def test_remove_notes(self):
         '''
         path = os.path.join(os.path.dirname(__file__), '.', 'data')
 
-        output = remove_notes(pd.read_csv(path + '/example datalog.xls', delimiter='\t'))
+        output = pp.remove_notes(pd.read_csv(path + '/example datalog.xls', delimiter='\t'))
 
         self.assertSequenceEqual(np.round(pd.to_numeric(output.iloc[:, 0]), 5).tolist(), np.round(np.array(
             [0.6842773323, 0.6843351954, 0.6843930789, 0.6844509555, 0.6845088278,
@@ -90,21 +218,138 @@ def test_remove_notes(self):
                 0.6938257774, 0.6938836411]), 5).tolist())
 
 
+    def test_get_data_by_time(self):
+        '''
+        Extract column(s) of data between given starting and ending days and times
+        '''
+        path = os.path.join(os.path.dirname(__file__), '.', 'data')
+
+        data_day1 = pd.read_csv(path + '/datalog_6-14-2018.xls', delimiter='\t')
+        data_day1 = np.round([pd.to_numeric(data_day1.iloc[:, 0]), pd.to_numeric(data_day1.iloc[:, 4])], 5)
+        data_day1 = [data_day1[0].tolist(), data_day1[1].tolist()]
+
+        data_day2 = pd.read_csv(path + '/datalog_6-15-2018.xls', delimiter='\t')
+        data_day2 = np.round([pd.to_numeric(data_day2.iloc[:, 0]), pd.to_numeric(data_day2.iloc[:, 4])], 5)
+        data_day2 = [data_day2[0].tolist(), data_day2[1].tolist()]
+        data_day2[0][0] = 0  # to remove scientific notation "e-"
+
+        # SINGLE COLUMN, ONE DAY
+        output = pp.get_data_by_time(path=path, columns=0, dates="6-14-2018", start_time="12:20",
+                                  end_time="13:00", extension=".xls")
+        self.assertSequenceEqual(np.round(output, 5).tolist(), data_day1[0][1041:1282])
+
+        # SINGLE COLUMN, TWO DAYS
+        output = pp.get_data_by_time(path=path, columns=0, dates=["6-14-2018", "6-15-2018"],
+                                  start_time="12:20", end_time="10:50", extension=".xls")
+        time_column = data_day1[0][1041:] + np.round(np.array(data_day2[0][:3901])+1, 5).tolist()
+        self.assertSequenceEqual(np.round(output, 5).tolist(), time_column)
+
+        # MULTI COLUMN, ONE DAY
+        output = pp.get_data_by_time(path=path, columns=[0, 4], dates=["6-14-2018"], start_time="12:20",
+                                  end_time="13:00", extension=".xls")
+        self.assertSequenceEqual(np.round(output[0], 5).tolist(), data_day1[0][1041:1282])
+        self.assertSequenceEqual(np.round(output[1], 5).tolist(), data_day1[1][1041:1282])
+
+        # MULTI COLUMN, TWO DAYS
+        output = pp.get_data_by_time(path=path, columns=[0, 4], dates=["6-14-2018", "6-15-2018"],
+                                  start_time="12:20", end_time="10:50", extension=".xls")
+        time_column = data_day1[0][1041:] + np.round(np.array(data_day2[0][:3901])+1, 5).tolist()
+        self.assertSequenceEqual(np.round(output[0], 5).tolist(), time_column)
+        self.assertSequenceEqual(np.round(output[1], 5).tolist(), data_day1[1][1041:]+data_day2[1][:3901])
+
+        # MULTI COLUMN, TWO DAYS, WITH UNITS
+        output = pp.get_data_by_time(path=path, columns=[0, 4], dates=["6-14-2018", "6-15-2018"],
+                                  start_time="12:20", end_time="10:50", extension=".xls", units=['day', 'mg/L'])
+        time_column = data_day1[0][1041:] + np.round(np.array(data_day2[0][:3901])+1, 5).tolist()
+        self.assertEqual(output[0].units, u.day)
+        self.assertSequenceEqual(np.round(output[0].magnitude, 5).tolist(), time_column)
+        self.assertEqual(output[1].units, u.mg/u.L)
+        self.assertSequenceEqual(np.round(output[1].magnitude, 5).tolist(), data_day1[1][1041:]+data_day2[1][:3901])
+
+        ######## WITH ELAPSED TIME ########
+        start = pp.day_fraction("12:20")
+        data_day1 = pd.read_csv(path + '/datalog_6-14-2018.xls', delimiter='\t')
+        data_day1 = [np.round(pd.to_numeric(data_day1.iloc[:, 0]) - start, 5).tolist(),
+                     np.round(pd.to_numeric(data_day1.iloc[:, 4]), 5).tolist()]
+
+        data_day2 = pd.read_csv(path + '/datalog_6-15-2018.xls', delimiter='\t')
+        data_day2.iloc[0,0] = 0  # to remove scientific notation "e-"
+        data_day2 = [np.round(pd.to_numeric(data_day2.iloc[:, 0]) - start + 1, 5).tolist(),
+                     np.round(pd.to_numeric(data_day2.iloc[:, 4]), 5).tolist()]
+
+        #  SINGLE COLUMN, ONE DAY
+        output = pp.get_data_by_time(path=path, columns=0, dates="6-14-2018", start_time="12:20",
+                                  end_time="13:00", extension=".xls", elapsed=True)
+        self.assertSequenceEqual(np.round(output, 5).tolist(), data_day1[0][1041:1282])
+
+         # MULTI COLUMN, TWO DAYS
+        output = pp.get_data_by_time(path=path, columns=[0, 4], dates=["6-14-2018", "6-15-2018"],
+                                  start_time="12:20", end_time="10:50", extension=".xls",
+                                  elapsed=True)
+        self.assertSequenceEqual(np.round(output[0], 5).tolist(), data_day1[0][1041:]+data_day2[0][:3901])
+        self.assertSequenceEqual(np.round(output[1], 5).tolist(), data_day1[1][1041:]+data_day2[1][:3901])
+
+
+    def test_day_fraction(self):
+        '''
+        Converts time into a fraction of the day
+        '''
+        time = pp.day_fraction(time="12:00")
+        self.assertEqual(time, 0.5)
+
+
+    def test_data_from_dates(self):
+        '''
+        Return a list of DataFrames representing the ProCoDA data files stored in the given path and recorded on the given dates.
+        '''
+        path = os.path.join(os.path.dirname(__file__), '.', 'data')
+        dataFromPath = pd.read_csv(path + '/datalog_6-15-2018.xls', delimiter='\t')
+
+        getDataFromDates = pp.data_from_dates(path=path, dates='6-15-2018', extension=".xls")[0]
+
+        self.assertTrue(getDataFromDates.equals(dataFromPath))
+
+
+    def test_column_start_to_end(self):
+        '''
+        Return entries in column from starting index in first DataFrame to ending index in last DataFrame
+        '''
+        #One DataFrame
+        path = os.path.join(os.path.dirname(__file__), '.', 'data')
+        data_manual1 = pd.read_csv(path + '/datalog_6-14-2018.xls', delimiter='\t')
+
+        getColData1 = pp.column_start_to_end(data=[data_manual1], column=1, start_idx=2, end_idx=7)
+        compareColData1 = [-4.34825945, -2.3821919, -2.57200098, -2.40549088,
+            -1.00214481]
+        self.assertSequenceEqual(getColData1, compareColData1)
+
+        #Three DataFrames
+        data_manual2 = pd.read_csv(path + '/datalog_6-16-2018.xls', delimiter='\t')
+        data_manual3 = pd.read_csv(path + '/datalog_6-15-2018.xls', delimiter='\t')
+
+        getColData2 = pp.column_start_to_end([data_manual1, data_manual2, data_manual3],
+            column=2, start_idx=5238, end_idx=2)
+        compareColData2 = [24.26625443, 24.2669487, 24.26613235, 24.26708603,
+            24.26683617, 24.26708603, 24.26683617]
+        self.assertSequenceEqual(getColData2, compareColData2)
+
+
     def test_get_data_by_state(self):
         '''
         Extract the time column and a data column for each iteration of a state
         '''
         path = os.path.join(os.path.dirname(__file__), '.', 'data')
 
-        output = get_data_by_state(path, dates=["6-19-2013"], state=1, column=1, extension=".xls")  # , "6-20-2013"
+        # Local path
+        output = pp.get_data_by_state(path, dates="6-19-2013", state=1, column=1, extension=".xls")  # , "6-20-2013"
 
         datafile = pd.read_csv(path + "/datalog_6-19-2013.xls", delimiter='\t')
         time_and_data1 = np.array([pd.to_numeric(datafile.iloc[:, 0]),
                                    np.round(pd.to_numeric(datafile.iloc[:, 1]), 5)])
         start_time = time_and_data1[0, 0]
-
-        answer = [time_and_data1[:, 98:175], time_and_data1[:, 220:485], time_and_data1[:, 3039:3304],
-                  time_and_data1[:, 5858:6123], time_and_data1[:, 8677:8942], time_and_data1[:, 11496:11761],
+        answer = [time_and_data1[:, 98:175], time_and_data1[:, 220:485],
+                  time_and_data1[:, 3039:3304], time_and_data1[:, 5858:6123],
+                  time_and_data1[:, 8677:8942], time_and_data1[:, 11496:11761],
                   time_and_data1[:, 14315:14580]]
 
         for i in range(len(output)):
@@ -112,168 +357,158 @@ def test_get_data_by_state(self):
             self.assertSequenceEqual([j[0] for j in output_i], [round(j-start_time, 5) for j in answer[i][0]])
             self.assertSequenceEqual([j[1] for j in output_i], [j for j in answer[i][1]])
 
-    def test_column_of_time(self):
-        '''''
-        Extract the time column from a data file.
-        '''''
-        path = os.path.join(os.path.dirname(__file__), '.', 'data', 'example datalog.xls')
-        answer = column_of_time(path, 50)
-        answer = np.round(answer, 5)
-        self.assertSequenceEqual(
-         answer.tolist(),
-         np.round(np.array([0.00000000e+00,   5.78662000e-05,   1.15725500e-04,
-         1.73586900e-04,   2.31470400e-04,   2.89325100e-04,
-         3.47199600e-04,   4.05070800e-04,   4.62941200e-04,
-         5.20805100e-04,   5.78682300e-04,   6.36541000e-04,
-         6.94405500e-04,   7.52295200e-04,   8.10152600e-04,
-         8.68025100e-04,   9.25879200e-04,   9.83766900e-04,
-         1.04163170e-03,   1.09949610e-03,   1.15736260e-03,
-         1.21522990e-03,   1.27310590e-03,   1.33096560e-03,
-         1.38884810e-03,   1.44671260e-03,   1.50456890e-03,
-         1.56244910e-03,   1.62031940e-03,   1.67819090e-03,
-         1.73605480e-03,   1.79390590e-03,   1.85178640e-03,
-         1.90965780e-03,   1.96752080e-03,   2.02538760e-03,
-         2.08325540e-03,   2.14113380e-03,   2.19899280e-03,
-         2.25686180e-03,   2.31473400e-03,   2.37261100e-03,
-         2.43048170e-03,   2.48834570e-03,   2.54620210e-03,
-         2.60408890e-03,   2.66194550e-03,   2.71981170e-03,
-         2.77768240e-03,   2.83556180e-03,   2.89342620e-03,
-         2.95130110e-03,   3.00916580e-03,   3.06704400e-03,
-         3.12490300e-03,   3.18278490e-03,   3.24064920e-03,
-         3.29852180e-03,   3.35638230e-03,   3.41425150e-03,
-         3.47212870e-03,   3.52999870e-03,   3.58786830e-03,
-         3.64572740e-03,   3.70359810e-03,   3.76146930e-03,
-         3.81933520e-03,   3.87721010e-03,   3.93506860e-03,
-         3.99295440e-03,   4.05082240e-03,   4.10868470e-03,
-         4.16654890e-03,   4.22442890e-03,   4.28230160e-03,
-         4.34016650e-03,   4.39804130e-03,   4.45591720e-03,
-         4.51377060e-03,   4.57164920e-03,   4.62952340e-03,
-         4.68739510e-03,   4.74524320e-03,   4.80312930e-03,
-         4.86098350e-03,   4.91887450e-03,   4.97673430e-03,
-         5.03459310e-03,   5.09248050e-03,   5.15033640e-03,
-         5.20820950e-03,   5.26607440e-03,   5.32394690e-03,
-         5.38181660e-03,   5.43967960e-03,   5.49755470e-03,
-         5.55543130e-03,   5.61330110e-03,   5.67117330e-03,
-         5.72903190e-03,   5.78690100e-03,   5.84477570e-03,
-         5.90264880e-03,   5.96051240e-03,   6.01837960e-03,
-         6.07625150e-03,   6.13413050e-03,   6.19199110e-03,
-         6.24987260e-03,   6.30772900e-03,   6.36560880e-03,
-         6.42346920e-03,   6.48135320e-03,   6.53921020e-03,
-         6.59709090e-03,   6.65494290e-03,   6.71281870e-03,
-         6.77069570e-03,   6.82855640e-03,   6.88642010e-03])*u.day, 5).tolist()
-        )
+        # Acceptable URL
+        url_acceptable = 'https://raw.githubusercontent.com/monroews/playing/master/ProCoDA_data'
+        output = pp.get_data_by_state(url_acceptable, dates="11-5-2019", state=1, column=1, extension='.tsv')
+        answer = pp.get_data_by_state(path, dates="11-5-2019", state=1, column=1, extension='.tsv')
 
+        for i in range(len(output)):
+            self.assertSequenceEqual([round(o, 5) for o in output[i][:,0]], [round(a, 5) for a in answer[i][:,0]])
+            self.assertSequenceEqual([round(o, 5) for o in output[i][:,1]], [round(a, 5) for a in answer[i][:,1]])
 
-    def test_column_of_data(self):
-        '''''
-        Extract other columns of data and append units.
-        '''''
-        path = os.path.join(os.path.dirname(__file__), '.', 'data', 'example datalog.xls')
-        answer = column_of_data(path, 50, 1, units='mg/L')
-        answer = np.round(answer, 5)
-        self.assertSequenceEqual(
-        answer.tolist(),
-        np.round(np.array([ 21.61681747,  21.31163216,  20.80215263,  20.46752739,
-        20.1048584 ,  19.7037487 ,  19.4194355 ,  18.95934677,
-        18.65832138,  18.24054337,  17.93864632,  17.591259  ,
-        17.25979805,  16.98148346,  16.60666656,  16.28514862,
-        15.99366856,  15.72474861,  15.35812187,  15.11634636,
-        14.75801468,  14.53341103,  14.20829868,  13.94124603,
-        13.69845104,  13.42016983,  13.17064667,  12.94155121,
-        12.66110611,  12.36821651,  12.1641016 ,  11.91081715,
-        11.69137764,  11.46448898,  11.2214098 ,  11.03143692,
-        10.78680801,  10.56936836,  10.36802101,  10.17097855,
-         9.95537758,   9.78312111,   9.55150509,   9.3843832 ,
-         9.21883678,   9.03395939,   8.85475636,   8.68857765,
-         8.47574997,   8.33256149,   8.13628197,   7.96697569,
-         7.80458403,   7.68562984,   7.4511261 ,   7.34629679,
-         7.17365456,   7.03930044,   6.88661861,   6.73307562,
-         6.60730886,   6.45987988,   6.30656338,   6.18089199,
-         6.05378485,   5.90268421,   5.81327915,   5.68042564,
-         5.57657337,   5.40122986,   5.33153057,   5.19660377,
-         5.09033108,   4.96228552,   4.85437012,   4.76652002,
-         4.66415834,   4.54592991,   4.43500376,   4.34614754,
-         4.24292231,   4.16423607,   4.06328297,   3.96581864,
-         3.88231015,   3.7828486 ,   3.74253488,   3.62953901,
-         3.53508115,   3.46755266,   3.36818004,   3.30672598,
-         3.22161722,   3.13899183,   3.08345532,   2.98398542,
-         2.94956589,   2.8504107 ,   2.79215455,   2.72924852,
-         2.66635823,   2.60831141,   2.53093195,   2.47217631,
-         2.42190933,   2.36228228,   2.30094266,   2.24602866,
-         2.19216943,   2.14143515,   2.10641694,   2.07170939,
-         2.04412961,   2.0158174 ,   2.00059986,   1.98546684,
-         1.97646523,   1.96455812,   1.95887971,   1.94987118])*u('mg/L'), 5).tolist()
-        )
+        # Github.com URL (blob)
+        url_github = 'https://github.com/monroews/playing/blob/master/ProCoDA_data'
+        output = pp.get_data_by_state(url_github, dates="11-5-2019", state=1, column=1, extension='.tsv')
+        for i in range(len(output)):
+            self.assertSequenceEqual([round(o, 5) for o in output[i][:,0]], [round(a, 5) for a in answer[i][:,0]])
+            self.assertSequenceEqual([round(o, 5) for o in output[i][:,1]], [round(a, 5) for a in answer[i][:,1]])
+
+         # Github.com URL (tree)
+        url_github = 'https://github.com/monroews/playing/tree/master/ProCoDA_data'
+        output = pp.get_data_by_state(url_github, dates="11-5-2019", state=1, column=1, extension='.tsv')
+        for i in range(len(output)):
+            self.assertSequenceEqual([round(o, 5) for o in output[i][:,0]], [round(a, 5) for a in answer[i][:,0]])
+            self.assertSequenceEqual([round(o, 5) for o in output[i][:,1]], [round(a, 5) for a in answer[i][:,1]])
+
+
+    def test_plot_columns(self):
+        '''
+        Plot the columns of data given the file located by labels
+        '''
+        path = os.path.join(os.path.dirname(__file__), '.', 'data') + '/statelog_6-14-2018.xls'
+
+        plt.figure()
+        pp.plot_columns(path=path, columns=" State ID")
+        plt.savefig("Image1.png")
+        plt.figure()
+        plt.plot([0,1,0,1,2])
+        plt.savefig("Image2.png")
+        self.assertEqual(None, compare_images("Image2.png", "Image1.png", 0))
+
+        plt.figure()
+        pp.plot_columns(path=path, columns=" State ID", x_axis=" State ID")
+        plt.savefig("Image3.png")
+        plt.figure()
+        plt.plot([0,1,0,1,2], [0,1,0,1,2])
+        plt.savefig("Image4.png")
+        self.assertEqual(None, compare_images("Image4.png", "Image3.png", 0))
+
+        plt.figure()
+        pp.plot_columns(path=path, columns=[" State ID"])
+        plt.savefig("Image5.png")
+        self.assertEqual(None, compare_images("Image1.png", "Image5.png", 0))
+
+        plt.figure()
+        pp.plot_columns(path=path, columns=[" State ID"], x_axis=" State ID")
+        plt.savefig("Image6.png")
+        self.assertEqual(None, compare_images("Image4.png", "Image6.png", 0))
+
+        self.assertRaisesRegex(ValueError, 'columns must be a string or list of strings',
+                                                        pp.plot_columns, *(path, 9))
+
+        os.remove("Image1.png")
+        os.remove("Image2.png")
+        os.remove("Image3.png")
+        os.remove("Image4.png")
+        os.remove("Image5.png")
+        os.remove("Image6.png")
+
+
+    def test_iplot_columns(self):
+        '''
+        Plot the columns of data given the file located by indices
+        '''
+        path = os.path.join(os.path.dirname(__file__), '.', 'data') + '/statelog_6-14-2018.xls'
+
+        plt.figure()
+        pp.iplot_columns(path=path, columns=1)
+        plt.savefig("Image1.png")
+        plt.figure()
+        plt.plot([0,1,0,1,2])
+        plt.savefig("Image2.png")
+        self.assertEqual(None, compare_images("Image2.png", "Image1.png", 0))
+
+        plt.figure()
+        pp.iplot_columns(path=path, columns=1, x_axis=1)
+        plt.savefig("Image3.png")
+        plt.figure()
+        plt.plot([0,1,0,1,2], [0,1,0,1,2])
+        plt.savefig("Image4.png")
+        self.assertEqual(None, compare_images("Image4.png", "Image3.png", 0))
+
+        plt.figure()
+        pp.iplot_columns(path=path, columns=[1])
+        plt.savefig("Image5.png")
+        self.assertEqual(None, compare_images("Image1.png", "Image5.png", 0))
+
+        plt.figure()
+        pp.iplot_columns(path=path, columns=[1], x_axis=1)
+        plt.savefig("Image6.png")
+        self.assertEqual(None, compare_images("Image4.png", "Image6.png", 0))
+
+        self.assertRaisesRegex(ValueError, 'columns must be an int or a list of ints',
+                                                    pp.iplot_columns, *(path, ' State ID'))
+
+        os.remove("Image1.png")
+        os.remove("Image2.png")
+        os.remove("Image3.png")
+        os.remove("Image4.png")
+        os.remove("Image5.png")
+        os.remove("Image6.png")
 
-    def test_notes(self):
-        '''''
-        Test function that extracts meta information from data file.
-        '''''
-        path = os.path.join(os.path.dirname(__file__), '.', 'data', 'example datalog.xls')
-        answer = notes(path)['Day fraction since midnight on ']
-        x = pd.DataFrame(index=[1, 29, 35],
-                         columns=['Day fraction since midnight on ', 'red dye (mg/L)', 'Run Pump ()', 'Pump ()'])
-        x.iloc[0][0] = 'Start'
-        x.iloc[1][0] = 'Start'
-        x.iloc[2][0] = '30 mg/L'
-        self.assertSequenceEqual(
-        answer.tolist(),
-        x['Day fraction since midnight on '].tolist())
 
     def test_read_state(self):
         path = os.path.join(os.path.dirname(__file__), '.', 'data', '')
-        time, data = read_state(["6-19-2013", "6-20-2013"], 1, 28, "mL/s", path,
-                                extension=".xls")
-        time = np.round(time, 5)
-        self.assertSequenceEqual(
-        time.tolist()[1000:1100],
-        np.round(
-        [0.10189837999999996, 0.10190995999999997, 0.10192152999999993,
-         0.10193310999999994, 0.10194468000000001, 0.10195624999999997,
-         0.10196782999999998, 0.10197939999999994, 0.10199097999999995,
-         0.10200254999999991, 0.10201412999999993, 0.1020257,
-         0.10203726999999996, 0.10204884999999997, 0.10206041999999993,
-         0.10207199999999994, 0.10208357000000001, 0.10209513999999997,
-         0.10210671999999998, 0.10211828999999994, 0.10212986999999996,
-         0.10214143999999992, 0.10215300999999999,
-         0.10216459, 0.10217615999999996, 0.10218773999999997,
-         0.10219930999999993, 0.10221088, 0.1022224599999999,
-         0.10223402999999998, 0.10224560999999999, 0.10225717999999995,
-         0.10226874999999991, 0.10228032999999992, 0.10229189999999999,
-         0.10230348, 0.10231504999999996, 0.10232662999999997,
-         0.10233819999999993, 0.10234977, 0.1023613499999999,
-         0.10237291999999998, 0.10238449999999999, 0.10239606999999995,
-         0.10240763999999991, 0.10241921999999992, 0.10243079,
-         0.10244237, 0.10245393999999997, 0.10246550999999993,
-         0.10247708999999994, 0.10248866000000001, 0.10250023999999991,
-         0.10251180999999998, 0.10252337999999994, 0.10253495999999995,
-         0.10254652999999991, 0.10255810999999992, 0.10256968,
-         0.10258124999999996, 0.10259282999999997, 0.10260439999999993,
-         0.10261597999999994, 0.10262755000000001, 0.10263912999999991,
-         0.10265069999999998, 0.10266226999999994, 0.10267384999999996,
-         0.10268541999999992, 0.10269699999999993, 0.10270857,
-         0.10272013999999996, 0.10273171999999997, 0.10274328999999993,
-         0.10275486999999994, 0.1027664399999999, 0.10277800999999998,
-         0.10278958999999999, 0.10280115999999995, 0.10281273999999996,
-         0.10282430999999992, 0.10283587999999999, 0.10284746,
-         0.10285902999999996, 0.10287060999999997, 0.10288229999999998,
-         0.10289375, 0.1029054399999999, 0.10291701999999991,
-         0.10292858999999999, 0.10294017, 0.10295162999999996,
-         0.10296330999999992, 0.10297488999999993, 0.10298646,
-         0.1029980399999999, 0.10300960999999997, 0.10302106999999994,
-         0.10303275999999995, 0.10304421999999991]*u.day, 5).tolist()
-        )
+        output_time, output_data = pp.read_state(["6-19-2013", "6-20-2013"], 1, 28, "mL/s", path, extension=".xls")
+
+        df_day1 = pd.read_csv(path + "/datalog_6-19-2013.xls", delimiter='\t')
+        df_day2 = pd.read_csv(path + "/datalog_6-20-2013.xls", delimiter='\t')
+
+        time_day1 = df_day1.iloc[:,0]
+        data_day1 = df_day1.iloc[:,28]
+        time_day2 = df_day2.iloc[:,0] + 1
+        data_day2 = df_day2.iloc[:,28]
+
+        answer_time = pd.concat([
+            time_day1[98:175], time_day1[220:485], time_day1[3039:3304],
+            time_day1[5858:6123], time_day1[8677:8942], time_day1[11496:11761],
+            time_day1[14315:14580],
+            time_day2[1442:1707], time_day2[4261:4526], time_day2[7080:7345],
+            time_day2[9899:10164], time_day2[12718:12983], time_day2[36572:40549],
+            time_day2[41660:41694], time_day2[41696:41698]
+            ]) - time_day1.iloc[0]
+
+        answer_data = pd.concat([
+            data_day1[98:175], data_day1[220:485], data_day1[3039:3304],
+            data_day1[5858:6123], data_day1[8677:8942], data_day1[11496:11761],
+            data_day1[14315:14580],
+            data_day2[1442:1707], data_day2[4261:4526], data_day2[7080:7345],
+            data_day2[9899:10164], data_day2[12718:12983], data_day2[36572:40549],
+            data_day2[41660:41694], data_day2[41696:41698]
+            ])
+
+        self.assertEqual(output_time.units, u.day)
+        self.assertSequenceEqual(list(output_time.magnitude), list(answer_time))
+        self.assertEqual(output_data.units, u.mL/u.s)
+        self.assertSequenceEqual(list(output_data.magnitude), list(answer_data))
 
-        self.assertSequenceEqual(
-        data.tolist()[1000:1100],
-        [5.4209375*u.mL/u.s for number in range(100)]
-        )
 
     def test_average_state(self):
         path = os.path.join(os.path.dirname(__file__), '.', 'data', '')
-        avgs = average_state(["6-19-2013", "6-20-2013"], 1, 28, "mL/s", path,
+        avgs = pp.average_state(["6-19-2013", "6-20-2013"], 1, 28, "mL/s", path,
                              extension=".xls")
         avgs = np.round(avgs, 5)
+
         self.assertSequenceEqual(
         avgs.tolist(),
         [5.5, 5.5, 5.5, 5.43125, 5.42094, 5.40908, 5.39544, 5.37976, 5.36172,
@@ -291,7 +526,7 @@ def avg_with_units(lst):
 
             return acc / num
 
-        avgs = perform_function_on_state(avg_with_units,
+        avgs = pp.perform_function_on_state(avg_with_units,
                                          ["6-19-2013", "6-20-2013"], 1, 28,
                                          "mL/s", path, extension=".xls")
         avgs = np.round(avgs, 5)
@@ -312,7 +547,7 @@ def avg_with_units(lst):
 
             return acc / num
 
-        ids, answer = read_state_with_metafile(avg_with_units, 1, 28, path, [], ".xls", "mg/L")
+        ids, answer = pp.read_state_with_metafile(avg_with_units, 1, 28, path, [], ".xls", "mg/L")
 
         self.assertSequenceEqual(["1", "2"], ids.tolist())
         self.assertSequenceEqual([5.445427082723495, 5.459751965314751]*u.mg/u.L, answer)
@@ -329,7 +564,7 @@ def avg_with_units(lst):
 
             return acc / num
 
-        output = write_calculations_to_csv(avg_with_units, 1, 28, path,
+        output = pp.write_calculations_to_csv(avg_with_units, 1, 28, path,
                                            ["Average Conc (mg/L)"], out_path,
                                            extension=".xls")
 
@@ -337,3 +572,49 @@ def avg_with_units(lst):
         self.assertSequenceEqual(
         [5.445427082723495, 5.459751965314751],
         output['Average Conc (mg/L)'].tolist())
+
+    def test_intersect(self):
+        #tests one crossing
+        x = np.array([1,2,3])
+        y1 = np.array([2,6,8])
+        y2 = np.array([6,2,3])
+        output = pp.intersect(x, y1, y2)
+        expected = (np.array([1.5]), np.array([4]), np.array([1]))
+        for i in range(len(expected)):
+            self.assertSequenceEqual(list(expected[i]), list(output[i]))
+
+        #tests two crossings
+        x = np.array([1,2,3,4,5,6])
+        y1 = np.array([2,6,8,4,1])
+        y2 = np.array([6,2,3,7,6])
+        output = pp.intersect(x,y1,y2)
+        expected = (np.array([1.5, 3.625]), np.array([4, 5.5]), np.array([1, 3]))
+        for i in range(len(expected)):
+            self.assertSequenceEqual(list(expected[i]), list(output[i]))
+
+        #tests parallel lines
+        x = np.array([1,2,3,4])
+        y1 = np.array([3,5,7,9])
+        y2 = np.array([5,7,9,11])
+        output = pp.intersect(x,y1,y2)
+        expected = (np.array([]), np.array([]), np.array([]))
+        for i in range(len(expected)):
+            self.assertSequenceEqual(list(expected[i]), list(output[i]))
+
+        #tests equal and crossing
+        x = np.array([-2,-1,0,1,2])
+        y1 = np.array([2,1,0,-1,-2])
+        y2 = np.array([-2,-1,0,1,2])
+        output = pp.intersect(x,y1,y2)
+        expected = (np.array([-0, -0]), np.array([0, 0]), np.array([2, 3]))
+        for i in range(len(expected)):
+            self.assertSequenceEqual(list(expected[i]), list(output[i]))
+
+        #tests equal and not crossing
+        x = np.array([0,1,2,3,4])
+        y1 = np.array([4,4,4,4,4])
+        y2 = np.array([3,4,3,0,-5])
+        output = pp.intersect(x,y1,y2)
+        expected = (np.array([1, 1]), np.array([4, 4]), np.array([1, 2]))
+        for i in range(len(expected)):
+            self.assertSequenceEqual(list(expected[i]), list(output[i]))