Skip to content

Commit

Permalink
Merge branch 'main' of github.com:eletoups/TheBattmobile
Browse files Browse the repository at this point in the history
  • Loading branch information
pvtr1998 committed Jun 5, 2024
2 parents 4f25d82 + 8ecbc7a commit c8a7b05
Show file tree
Hide file tree
Showing 10 changed files with 489 additions and 131 deletions.
47 changes: 23 additions & 24 deletions .github/workflows/python-package-conda.yml
Original file line number Diff line number Diff line change
@@ -1,33 +1,32 @@
name: Python Package using Conda
name: Python application

on: [push]
on: [push, pull_request]

jobs:
build-linux:
build:

runs-on: ubuntu-latest
strategy:
max-parallel: 5

steps:
- uses: actions/checkout@v3
- name: Set up Python 3.11
uses: actions/setup-python@v3
- name: Checkout code
uses: actions/checkout@v2

- name: Set up Miniconda
uses: conda-incubator/setup-miniconda@v2
with:
python-version: '3.11'
- name: Add conda to system path
run: |
# $CONDA is an environment variable pointing to the root of the miniconda directory
echo $CONDA/bin >> $GITHUB_PATH
- name: Install dependencies
run: |
conda env update --file environment.yml --name base
- name: Lint with flake8
auto-update-conda: true
python-version: 3.8 # Use the same Python version as in your environment.yml

- name: Remove existing environment
run: |
conda install flake8
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exit-zero
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exit-zero
- name: Test with unittest
conda env remove -n alfred || echo "Environment alfred does not exist"
- name: Create environment
run: conda env create -f environment.yml

- name: Update environment
run: conda env update --file environment.yml --prune

- name: Run tests
run: |
python -m unittest discover
conda run -n alfred python -m unittest discover
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ __pycache__/
*.so

# no .csvs
*.csv
#*.csv

# Distribution / packaging
.Python
Expand Down
Binary file added alfred/data/KCM-Raw-Data.zip
Binary file not shown.
2 changes: 1 addition & 1 deletion alfred/etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def group_files(directory):
count += 1
else:
moved_files.append(filename)
incomplete = os.path.join(directory, 'incomplete_test')
incomplete = os.path.join(directory, 'incomplete')
if not os.path.exists(incomplete):
os.makedirs(incomplete)
destination = os.path.join(incomplete, filename)
Expand Down
257 changes: 232 additions & 25 deletions alfred/tests/test_etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import unittest
import zipfile
import pandas as pd
import numpy as np
import shutil
import alfred

Expand All @@ -20,7 +21,7 @@ def setUp(self):
zip_ref.extractall(self.temp_dir)

def test_find_directory_ends_with_forward_slash(self):
directory = alfred.etl.find_directory()
directory = alfred.find_directory()
self.assertTrue(directory.endswith('/'), "Directory does not end with a forward slash")

def tearDown(self):
Expand Down Expand Up @@ -50,7 +51,7 @@ def test_grab_csv(self):
Testing the grab_csv function.
"""
# find the csvs extracted from the zip
result = alfred.etl.grab_csv(self.temp_dir)
result = alfred.grab_csv(self.temp_dir)
expected_result = [
'14F0154_ProfileData_20171116125343(2).csv',
'14B0059_ProfileData_20180403061303(2).csv',
Expand All @@ -73,7 +74,7 @@ def test_grab_csv(self):
self.assertListEqual(result, expected_result)

# this should return an empty list
result = alfred.etl.grab_csv(data_path)
result = alfred.grab_csv(data_path)
self.assertListEqual(result, [])

def tearDown(self):
Expand All @@ -86,36 +87,242 @@ def tearDown(self):
os.remove(file_path)
os.rmdir(self.temp_dir)

from alfred.etl import group_files



class TestGroupFiles(unittest.TestCase):
def setUp(self):
self.valid_directory = os.path.join(data_path, 'unsorted_test_data')

def test_group_file_creates_subdirectories(self):
alfred.group_files(self.valid_directory)

# Define the expected subdirectory names
expected_subdirs = [
"bus_0039141908140233500710",
"bus_0039142006m40233500710",
"bus_0039142903b40233500710",
"bus_0039151006940233500710",
"incomplete"
]
for subdir_name in expected_subdirs:
subdir_path = os.path.join(self.valid_directory, subdir_name)
# Check if the subdirectory exists
self.assertTrue(os.path.exists(subdir_path))
# Check if it's a directory
self.assertTrue(os.path.isdir(subdir_path))
def test_bus_0039141908140233500710(self):
alfred.group_files(self.valid_directory)
subdir_name = "bus_0039141908140233500710"
subdir_path = os.path.join(self.valid_directory, subdir_name)
self.assertTrue(os.path.exists(subdir_path))
self.assertTrue(os.path.isdir(subdir_path))
expected_files = [
'14F0154_ProfileData_20171116125343(2).csv'
]
for file_name in expected_files:
file_path = os.path.join(subdir_path, file_name)
self.assertTrue(os.path.exists(file_path))
self.assertTrue(os.path.isfile(file_path))

def test_bus_0039142006m40233500710(self):
alfred.group_files(self.valid_directory)
subdir_name = "bus_0039142006m40233500710"
subdir_path = os.path.join(self.valid_directory, subdir_name)
self.assertTrue(os.path.exists(subdir_path))
self.assertTrue(os.path.isdir(subdir_path))
expected_files = [
'14B0059_ProfileData_20180403061303(2).csv'
]
for file_name in expected_files:
file_path = os.path.join(subdir_path, file_name)
self.assertTrue(os.path.exists(file_path))
self.assertTrue(os.path.isfile(file_path))

def test_bus_0039142903b40233500710(self):
alfred.group_files(self.valid_directory)
subdir_name = "bus_0039142903b40233500710"
subdir_path = os.path.join(self.valid_directory, subdir_name)
self.assertTrue(os.path.exists(subdir_path))
self.assertTrue(os.path.isdir(subdir_path))
expected_files = [
'14H0221_ProfileData_20171005100537(3).csv',
'14H0221_ProfileData_20171027081821(2).csv',
'14H0221_ProfileData_20180403084955(2).csv'
]
for file_name in expected_files:
file_path = os.path.join(subdir_path, file_name)
self.assertTrue(os.path.exists(file_path))
self.assertTrue(os.path.isfile(file_path))

def test_bus_0039151006940233500710(self):
alfred.group_files(self.valid_directory)
subdir_name = "bus_0039151006940233500710"
subdir_path = os.path.join(self.valid_directory, subdir_name)
self.assertTrue(os.path.exists(subdir_path))
self.assertTrue(os.path.isdir(subdir_path))
expected_files = [
'0014_ProfileData_20160727062458(2).csv',
'0014_ProfileData_20161004063626(2).csv',
'0014_ProfileData_20161201100821(2).csv',
'0014_ProfileData_20170207092734(2).csv',
'13J0014_ProfileData_20180801084738(2).csv',
'13J0014_ProfileData_20180802094539(2).csv'
]
for file_name in expected_files:
file_path = os.path.join(subdir_path, file_name)
self.assertTrue(os.path.exists(file_path))
self.assertTrue(os.path.isfile(file_path))

def test_incomplete(self):
alfred.group_files(self.valid_directory)
subdir_name = "incomplete"
subdir_path = os.path.join(self.valid_directory, subdir_name)
self.assertTrue(os.path.exists(subdir_path))
self.assertTrue(os.path.isdir(subdir_path))
expected_files = [
'_ProfileData_20180425073946(2).csv',
'_ProfileData_20180508065837(2).csv'
]
for file_name in expected_files:
file_path = os.path.join(subdir_path, file_name)
self.assertTrue(os.path.exists(file_path))
self.assertTrue(os.path.isfile(file_path))

def tearDown(self):
pass


class TestCountBusFile(unittest.TestCase):
def setUp(self):
# Create a temporary directory
self.test_dir = tempfile.mkdtemp()
self.test_data_dir = os.path.join(self.test_dir, 'alfred/unsorted_test_data')
os.makedirs(self.test_data_dir)

# Add test CSV file
self.test_file_name = '14F0154_ProfileData_20171116125343(2).csv'
self.test_file_path = os.path.join(self.test_data_dir, self.test_file_name)
with open(self.test_file_path, 'w') as f:
f.write('test data')

def test_group_files(self):
# Call the group_files function
group_files(self.test_data_dir)
self.valid_directory = os.path.join(data_path, 'unsorted_test_data')

def test_count_bus_file(self):
count = alfred.count_bus_file(self.valid_directory)
self.assertEqual(count, 4)

def test_count_bus_file_returns_integer(self):
count = alfred.count_bus_file(self.valid_directory)
self.assertIsInstance(count, int)

def tearDown(self):
pass

class TestSortBusByDate(unittest.TestCase):
def setUp(self):
self.valid_directory = os.path.join(data_path, 'unsorted_test_data/')

# Check if the expected folder and file are created
expected_dir = os.path.join(self.test_data_dir, 'bus_0039141908140233500710')
expected_file = os.path.join(expected_dir, self.test_file_name)
def test_sort_data_by_date_returns_dataframe(self):
bus_num = 'bus_0039151006940233500710/'
df = alfred.sort_bus_by_date(self.valid_directory, bus_num)
self.assertIsInstance(df, pd.DataFrame)


def test_sort_data_by_date_dataframe_shape(self):
bus_num = 'bus_0039151006940233500710/'
bus_directory = os.path.join(self.valid_directory, bus_num)
csv_files = [f for f in os.listdir(bus_directory) if f.endswith('.csv')]
expected_rows = len(csv_files)
df = alfred.sort_bus_by_date(self.valid_directory, bus_num)
self.assertEqual(df.shape[0], expected_rows)
self.assertEqual(df.shape[1], 2)

def tearDown(self):
pass

self.assertTrue(os.path.isdir(expected_dir), f"Expected directory {expected_dir} does not exist.")
self.assertTrue(os.path.isfile(expected_file), f"Expected file {expected_file} does not exist.")
class TestCompareFileMods(unittest.TestCase):
def setUp(self):
self.valid_directory = os.path.join(data_path, 'unsorted_test_data/')
self.invalid_directory = os.path.join(data_path, 'unzipped_test_data/')

def test_compare_file_mods_returns_dictionary(self):
result = alfred.compare_file_mods(self.valid_directory)
self.assertIsInstance(result, dict)

def test_compare_file_mods_dictionary_length(self):
result = alfred.compare_file_mods(self.valid_directory)
subdirs = [d for d in os.listdir(self.valid_directory) if os.path.isdir(os.path.join(self.valid_directory, d))]
self.assertEqual(len(result), len(subdirs)-1) # -1 to account for the incomplete subdirectory

def test_compare_file_mods_unsorted_test_data_returns_empty_dict(self):
result = alfred.compare_file_mods(self.invalid_directory)
self.assertEqual(result, {})

def tearDown(self):
# Remove the temporary directory after the test
shutil.rmtree(self.test_dir)
pass

class TestFilterFalseMod(unittest.TestCase):
def setUp(self):
self.valid_directory = os.path.join(data_path, 'unsorted_test_data/')

def test_filter_false_module_returns_numpy_array(self):
result = alfred.filter_false_module(self.valid_directory)
self.assertIsInstance(result, np.ndarray)

def test_filter_false_module_contains_expected_entries(self):
expected_entries = ['bus_0039142903b40233500710',
'bus_0039151006940233500710']
result = alfred.filter_false_module(self.valid_directory)
for entry in expected_entries:
self.assertIn(entry, result)

def tearDown(self):
pass

class TestMoveFalseBus(unittest.TestCase):
def setUp(self):
self.valid_directory = os.path.join(data_path, 'sorted_test_data/')

def test_move_false_bus_creates_vis_buses_directory(self):
alfred.move_false_bus(self.valid_directory)
# Check if 'vis_buses' directory is created
vis_buses_dir = os.path.join(self.valid_directory, 'vis_buses')
self.assertTrue(os.path.exists(vis_buses_dir))
self.assertTrue(os.path.isdir(vis_buses_dir))

def test_move_false_bus_moves_correct_directories(self):
alfred.move_false_bus(self.valid_directory)
# Check if 'bus_0039142903b40233500710' and 'bus_0039151006940233500710' are in 'vis_buses'
vis_buses_dir = os.path.join(self.valid_directory, 'vis_buses')
expected_directories = ['bus_0039142903b40233500710', 'bus_0039151006940233500710']
for directory in expected_directories:
dir_path = os.path.join(vis_buses_dir, directory)
self.assertTrue(os.path.exists(dir_path))
self.assertTrue(os.path.isdir(dir_path))

def test_move_false_bus_no_other_directories_in_vis_buses(self):
alfred.move_false_bus(self.valid_directory)
# Check if there are no other directories besides 'bus_0039142903b40233500710' and 'bus_0039151006940233500710' in 'vis_buses'
vis_buses_dir = os.path.join(self.valid_directory, 'vis_buses')
expected_directories = ['bus_0039142903b40233500710', 'bus_0039151006940233500710']
for item in os.listdir(vis_buses_dir):
if os.path.isdir(os.path.join(vis_buses_dir, item)):
self.assertIn(item, expected_directories)

def tearDown(self):
pass

# class TestUnpackInteractive(unittest.TestCase):
# def setUp(self):
# self.valid_directory = os.path.join(data_path, 'sorted_test_data/')
# raw_data_folder_name = 'alfred/' + 'data/'
# zip_filename = 'test_data.zip'

# def test_unpack_interactive_creates_sorted_data_directory(self):
# alfred.etl.unpack_interactive()
# # Check if 'sorted_data' directory is created
# sorted_data_dir = os.path.join(self.valid_directory, 'sorted_data')
# self.assertTrue(os.path.exists(sorted_data_dir))
# self.assertTrue(os.path.isdir(sorted_data_dir))

# def test_unpack_interactive_creates_all_buses_directory(self):
# alfred.etl.unpack_interactive()
# # Check if 'all_buses' directory is created
# all_buses_dir = os.path.join(self.valid_directory, 'all_buses')
# self.assertTrue(os.path.exists(all_buses_dir))
# self.assertTrue(os.path.isdir(all_buses_dir))
# def tearDown(self):
# pass

if __name__ == '__main__':
unittest.main()
Loading

0 comments on commit c8a7b05

Please sign in to comment.