Skip to content

Commit

Permalink
Updated vis
Browse files Browse the repository at this point in the history
  • Loading branch information
pvtr1998 committed Jun 5, 2024
1 parent 26f0c2d commit 4f25d82
Showing 1 changed file with 131 additions and 1 deletion.
132 changes: 131 additions & 1 deletion alfred/vis.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import re
from os import listdir

from .etl import find_directory, sort_bus_by_date
from etl import find_directory, sort_bus_by_date

# def find_directory():
# '''
Expand Down Expand Up @@ -680,3 +680,133 @@ def swapped_mod_dataframes(directory, serial_num, characteristic):
index.name = title
list_desired_dfs.append(df_characteristic)
return list_desired_dfs

def label_bad_module(directory):
'''
Counts changes in modules sequentially for each bus over all CSV files for all buses.
Outputs a DataFrame that is used for heatmap visualizations.
Parameters:
- directory (str): The directory where the files sorted by bus can be found.
'''
keyword = 'Mfg Data (ASCII)'
list_bus_nums = [] # To get the name of bus number folders
bus_to_ordered_csvs = {}
# Dictionary associating each bus folder
# with an chronologically ordered list of CSVs
bus_to_ordered_dates = {}
# Dictionary associating each bus folder
# with dates listed chronologically
file_serials = {} # Dictionary with serial numbers for each CSV
list_df = [] # List of dataframes for each bus
column_names = ['Bus', 'Module', 'Date', 'Change']
num_mods = 16 # Constant number of mods
module_index = 8 # For grabbing module string indices later
bus_single = 5
bus_double = 6
last_two_chars = -2 # For grabbing last two characters
last_one_chars = -1 # For grabbing last character
mod_index = ['Module ' + str(i) for i in range(1, num_mods + 1)]
# Creating rows for dataframe
mod_change_count = {}
# Dictionary for number of changes,
# sum value for each module # as compared file to file
keyword = 'Mfg Data (ASCII)' # Keyword to search for
for file in listdir(directory):
# Place this file in directory with False_files -> Keiton's code
if file.startswith('bus'):
list_bus_nums.append(file) # Getting list of bus names
for bus in list_bus_nums: # For each bus
ordered_dates = []
df = sort_bus_by_date(directory, bus + '/')
ordered_csv = df['Filename'].tolist()
ordered_unclean_dates = df['DateRetrieved'].tolist()
for unclean_date in ordered_unclean_dates:
split_results = unclean_date.strftime('%m/%d/%Y, %H:%M:%S')
ordered_dates.append(split_results)
bus_to_ordered_csvs[bus] = ordered_csv
# Grabbing a sorted list of CSV's for each bus folder
bus_to_ordered_dates[bus] = ordered_dates
# Grabbing a sorted list of dates for each folder
for bus_key in bus_to_ordered_csvs:
# For each bus folder (key value for bus to ordered files dictionary)
for mod_name in mod_index:
# Setting dictionary with all module count at 0 to start.
# Should be for each bus.
mod_change_count[mod_name] = [0]
# Add the dataframe at the end of the comparisons to the list_df
ordered_dates = bus_to_ordered_dates[bus_key]
# Grab list of dates for dataframe use later
ordered_csvs = bus_to_ordered_csvs[bus_key]
# Grab the list of ordered CSV's associated with current bus folder
for i in range(len(ordered_csvs)):
# For each file in the list of ordered CSV's
serial_nums = []
# Start with empty list of serial numbers for that file
with open(directory + bus_key + '/' + ordered_csvs[i]) as file:
# Looking through current file
reader = csv.reader(file)
for row in reader:
for element in row:
if keyword in element:
mod_num = re.sub(r'\W+', '', element[17:]).lower()
serial_nums.append(mod_num)
# Grabbing serial numbers for each CSV file
else:
pass
# After you get all the serial numbers for a file
serial_nums.pop(0) # Getting rid of first module number
file_serials[ordered_csvs[i]] = serial_nums
# Key: file name. Value: List of serial numbers for that file name

# At this point, we have a list of
# serial numbers associated with each CSV file
i = 0
while i < len(ordered_csvs) - 1:
first_mods = file_serials[ordered_csvs[i]]
next_mods = file_serials[ordered_csvs[i + 1]]

for j in range(len(first_mods)):
m_str = "Module " + str(j + 1)
prev_m_str = "Module " + str(j) if j > 0 else "Module 1"
if first_mods[j] != next_mods[j]:
# If serials differ, set change for the previous module
# to 1
mod_change_count[prev_m_str].append(1)
else:
# If serials are the same, set the change
# for the previous module to 0
mod_change_count[prev_m_str].append(0)

i += 1

# Now we have dictionary with count of changes per file
# compared for each module (16 mods)
num_comps = len(ordered_csvs) - 1
bus_num_element = bus_key
bus_number_list = [bus_num_element for
i in range((num_comps + 1) * num_mods)]
# To get the bus # values
module_labels = []
change_labels = []
mod_num_label = ''
for mod_label in mod_change_count.keys():
# For each module number 1 through 16
change_labels += mod_change_count[mod_label]
if len(mod_label) > module_index:
mod_num_label = mod_label[last_two_chars:]
else:
mod_num_label = mod_label[last_one_chars]
for i in range(num_comps + 1):
module_labels.append(mod_num_label)
date_labels = ordered_dates * num_mods
data_lists = [bus_number_list,
module_labels,
date_labels,
change_labels]
df_dict = {}
for column, data_list in zip(column_names, data_lists):
df_dict[column] = data_list
df_bus_changes = pd.DataFrame(data=df_dict)
list_df.append(df_bus_changes)
return pd.concat(list_df, axis=0)

0 comments on commit 4f25d82

Please sign in to comment.