From 4f25d82cfc58bc1cfd5f76db2923af9565efb8d4 Mon Sep 17 00:00:00 2001 From: Phillip Romero Date: Wed, 5 Jun 2024 12:52:13 -0700 Subject: [PATCH] Updated vis --- alfred/vis.py | 132 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 131 insertions(+), 1 deletion(-) diff --git a/alfred/vis.py b/alfred/vis.py index 42f12a3..0e919f2 100644 --- a/alfred/vis.py +++ b/alfred/vis.py @@ -6,7 +6,7 @@ import re from os import listdir -from .etl import find_directory, sort_bus_by_date +from etl import find_directory, sort_bus_by_date # def find_directory(): # ''' @@ -680,3 +680,133 @@ def swapped_mod_dataframes(directory, serial_num, characteristic): index.name = title list_desired_dfs.append(df_characteristic) return list_desired_dfs + +def label_bad_module(directory): + ''' + Counts changes in modules sequentially for each bus over all CSV files for all buses. + Outputs a DataFrame that is used for heatmap visualizations. + + Parameters: + - directory (str): The directory where the files sorted by bus can be found. + ''' + keyword = 'Mfg Data (ASCII)' + list_bus_nums = [] # To get the name of bus number folders + bus_to_ordered_csvs = {} + # Dictionary associating each bus folder + # with an chronologically ordered list of CSVs + bus_to_ordered_dates = {} + # Dictionary associating each bus folder + # with dates listed chronologically + file_serials = {} # Dictionary with serial numbers for each CSV + list_df = [] # List of dataframes for each bus + column_names = ['Bus', 'Module', 'Date', 'Change'] + num_mods = 16 # Constant number of mods + module_index = 8 # For grabbing module string indices later + bus_single = 5 + bus_double = 6 + last_two_chars = -2 # For grabbing last two characters + last_one_chars = -1 # For grabbing last character + mod_index = ['Module ' + str(i) for i in range(1, num_mods + 1)] + # Creating rows for dataframe + mod_change_count = {} + # Dictionary for number of changes, + # sum value for each module # as compared file to file + keyword = 'Mfg Data (ASCII)' # Keyword to search for + for file in listdir(directory): + # Place this file in directory with False_files -> Keiton's code + if file.startswith('bus'): + list_bus_nums.append(file) # Getting list of bus names + for bus in list_bus_nums: # For each bus + ordered_dates = [] + df = sort_bus_by_date(directory, bus + '/') + ordered_csv = df['Filename'].tolist() + ordered_unclean_dates = df['DateRetrieved'].tolist() + for unclean_date in ordered_unclean_dates: + split_results = unclean_date.strftime('%m/%d/%Y, %H:%M:%S') + ordered_dates.append(split_results) + bus_to_ordered_csvs[bus] = ordered_csv + # Grabbing a sorted list of CSV's for each bus folder + bus_to_ordered_dates[bus] = ordered_dates + # Grabbing a sorted list of dates for each folder + for bus_key in bus_to_ordered_csvs: + # For each bus folder (key value for bus to ordered files dictionary) + for mod_name in mod_index: + # Setting dictionary with all module count at 0 to start. + # Should be for each bus. + mod_change_count[mod_name] = [0] + # Add the dataframe at the end of the comparisons to the list_df + ordered_dates = bus_to_ordered_dates[bus_key] + # Grab list of dates for dataframe use later + ordered_csvs = bus_to_ordered_csvs[bus_key] + # Grab the list of ordered CSV's associated with current bus folder + for i in range(len(ordered_csvs)): + # For each file in the list of ordered CSV's + serial_nums = [] + # Start with empty list of serial numbers for that file + with open(directory + bus_key + '/' + ordered_csvs[i]) as file: + # Looking through current file + reader = csv.reader(file) + for row in reader: + for element in row: + if keyword in element: + mod_num = re.sub(r'\W+', '', element[17:]).lower() + serial_nums.append(mod_num) + # Grabbing serial numbers for each CSV file + else: + pass + # After you get all the serial numbers for a file + serial_nums.pop(0) # Getting rid of first module number + file_serials[ordered_csvs[i]] = serial_nums + # Key: file name. Value: List of serial numbers for that file name + + # At this point, we have a list of + # serial numbers associated with each CSV file + i = 0 + while i < len(ordered_csvs) - 1: + first_mods = file_serials[ordered_csvs[i]] + next_mods = file_serials[ordered_csvs[i + 1]] + + for j in range(len(first_mods)): + m_str = "Module " + str(j + 1) + prev_m_str = "Module " + str(j) if j > 0 else "Module 1" + if first_mods[j] != next_mods[j]: + # If serials differ, set change for the previous module + # to 1 + mod_change_count[prev_m_str].append(1) + else: + # If serials are the same, set the change + # for the previous module to 0 + mod_change_count[prev_m_str].append(0) + + i += 1 + + # Now we have dictionary with count of changes per file + # compared for each module (16 mods) + num_comps = len(ordered_csvs) - 1 + bus_num_element = bus_key + bus_number_list = [bus_num_element for + i in range((num_comps + 1) * num_mods)] + # To get the bus # values + module_labels = [] + change_labels = [] + mod_num_label = '' + for mod_label in mod_change_count.keys(): + # For each module number 1 through 16 + change_labels += mod_change_count[mod_label] + if len(mod_label) > module_index: + mod_num_label = mod_label[last_two_chars:] + else: + mod_num_label = mod_label[last_one_chars] + for i in range(num_comps + 1): + module_labels.append(mod_num_label) + date_labels = ordered_dates * num_mods + data_lists = [bus_number_list, + module_labels, + date_labels, + change_labels] + df_dict = {} + for column, data_list in zip(column_names, data_lists): + df_dict[column] = data_list + df_bus_changes = pd.DataFrame(data=df_dict) + list_df.append(df_bus_changes) + return pd.concat(list_df, axis=0) \ No newline at end of file