From 4f25d82cfc58bc1cfd5f76db2923af9565efb8d4 Mon Sep 17 00:00:00 2001
From: Phillip Romero <pvtr1998@gmail.com>
Date: Wed, 5 Jun 2024 12:52:13 -0700
Subject: [PATCH] Updated vis

---
 alfred/vis.py | 132 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 131 insertions(+), 1 deletion(-)

diff --git a/alfred/vis.py b/alfred/vis.py
index 42f12a3..0e919f2 100644
--- a/alfred/vis.py
+++ b/alfred/vis.py
@@ -6,7 +6,7 @@
 import re
 from os import listdir
 
-from .etl import find_directory, sort_bus_by_date
+from etl import find_directory, sort_bus_by_date
 
 # def find_directory():
 #    '''
@@ -680,3 +680,133 @@ def swapped_mod_dataframes(directory, serial_num, characteristic):
                                 index.name = title
                                 list_desired_dfs.append(df_characteristic)
     return list_desired_dfs
+
+def label_bad_module(directory):
+    '''
+    Counts changes in modules sequentially for each bus over all CSV files for all buses. 
+    Outputs a DataFrame that is used for heatmap visualizations.
+    
+    Parameters:
+    - directory (str): The directory where the files sorted by bus can be found.
+    '''
+    keyword = 'Mfg Data (ASCII)'
+    list_bus_nums = []  # To get the name of bus number folders
+    bus_to_ordered_csvs = {}
+    # Dictionary associating each bus folder
+    # with an chronologically ordered list of CSVs
+    bus_to_ordered_dates = {}
+    # Dictionary associating each bus folder
+    # with dates listed chronologically
+    file_serials = {}  # Dictionary with serial numbers for each CSV
+    list_df = []  # List of dataframes for each bus
+    column_names = ['Bus', 'Module', 'Date', 'Change']
+    num_mods = 16  # Constant number of mods
+    module_index = 8  # For grabbing module string indices later
+    bus_single = 5
+    bus_double = 6
+    last_two_chars = -2  # For grabbing last two characters
+    last_one_chars = -1  # For grabbing last character
+    mod_index = ['Module ' + str(i) for i in range(1, num_mods + 1)]
+    # Creating rows for dataframe
+    mod_change_count = {}
+    # Dictionary for number of changes,
+    # sum value for each module # as compared file to file
+    keyword = 'Mfg Data (ASCII)'  # Keyword to search for
+    for file in listdir(directory):
+        # Place this file in directory with False_files -> Keiton's code
+        if file.startswith('bus'):
+            list_bus_nums.append(file)  # Getting list of bus names
+    for bus in list_bus_nums:  # For each bus
+        ordered_dates = []
+        df = sort_bus_by_date(directory, bus + '/')
+        ordered_csv = df['Filename'].tolist()
+        ordered_unclean_dates = df['DateRetrieved'].tolist()
+        for unclean_date in ordered_unclean_dates:
+            split_results = unclean_date.strftime('%m/%d/%Y, %H:%M:%S')
+            ordered_dates.append(split_results)
+        bus_to_ordered_csvs[bus] = ordered_csv
+        # Grabbing a sorted list of CSV's for each bus folder
+        bus_to_ordered_dates[bus] = ordered_dates
+        # Grabbing a sorted list of dates for each folder
+    for bus_key in bus_to_ordered_csvs:
+        # For each bus folder (key value for bus to ordered files dictionary)
+        for mod_name in mod_index:
+            # Setting dictionary with all module count at 0 to start.
+            # Should be for each bus.
+            mod_change_count[mod_name] = [0]
+            # Add the dataframe at the end of the comparisons to the list_df
+        ordered_dates = bus_to_ordered_dates[bus_key]
+        # Grab list of dates for dataframe use later
+        ordered_csvs = bus_to_ordered_csvs[bus_key]
+        # Grab the list of ordered CSV's associated with current bus folder
+        for i in range(len(ordered_csvs)):
+            # For each file in the list of ordered CSV's
+            serial_nums = []
+            # Start with empty list of serial numbers for that file
+            with open(directory + bus_key + '/' + ordered_csvs[i]) as file:
+                # Looking through current file
+                reader = csv.reader(file)
+                for row in reader:
+                    for element in row:
+                        if keyword in element:
+                            mod_num = re.sub(r'\W+', '', element[17:]).lower()
+                            serial_nums.append(mod_num)
+                            # Grabbing serial numbers for each CSV file
+                        else:
+                            pass
+            # After you get all the serial numbers for a file
+            serial_nums.pop(0)  # Getting rid of first module number
+            file_serials[ordered_csvs[i]] = serial_nums
+            # Key: file name. Value: List of serial numbers for that file name
+
+        # At this point, we have a list of
+        # serial numbers associated with each CSV file
+        i = 0
+        while i < len(ordered_csvs) - 1:
+            first_mods = file_serials[ordered_csvs[i]]
+            next_mods = file_serials[ordered_csvs[i + 1]]
+            
+            for j in range(len(first_mods)):
+                m_str = "Module " + str(j + 1)
+                prev_m_str = "Module " + str(j) if j > 0 else "Module 1"
+                if first_mods[j] != next_mods[j]:
+                    # If serials differ, set change for the previous module
+                    # to 1
+                    mod_change_count[prev_m_str].append(1)
+                else:
+                    # If serials are the same, set the change 
+                    # for the previous module to 0
+                    mod_change_count[prev_m_str].append(0)
+            
+            i += 1
+            
+        # Now we have dictionary with count of changes per file
+        # compared for each module (16 mods)
+        num_comps = len(ordered_csvs) - 1
+        bus_num_element = bus_key
+        bus_number_list = [bus_num_element for
+                           i in range((num_comps + 1) * num_mods)]
+        # To get the bus # values
+        module_labels = []
+        change_labels = []
+        mod_num_label = ''
+        for mod_label in mod_change_count.keys():
+            # For each module number 1 through 16
+            change_labels += mod_change_count[mod_label]
+            if len(mod_label) > module_index:
+                mod_num_label = mod_label[last_two_chars:]
+            else:
+                mod_num_label = mod_label[last_one_chars]
+            for i in range(num_comps + 1):
+                module_labels.append(mod_num_label)
+        date_labels = ordered_dates * num_mods
+        data_lists = [bus_number_list,
+                      module_labels,
+                      date_labels,
+                      change_labels]
+        df_dict = {}
+        for column, data_list in zip(column_names, data_lists):
+            df_dict[column] = data_list
+        df_bus_changes = pd.DataFrame(data=df_dict)
+        list_df.append(df_bus_changes)
+    return pd.concat(list_df, axis=0)
\ No newline at end of file