-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvendor.py
64 lines (55 loc) · 2.53 KB
/
vendor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import csv
from collections import defaultdict
import re
# Read in the final.csv file and store the data in a dictionary
final_data = defaultdict(list)
with open('final.csv', 'r') as f:
reader = csv.DictReader(f)
for row in reader:
final_data[row['Description']].append(row)
# Helper function to extract the account number from a description
def extract_account_number(description):
match = re.search(r'XXXXXX(\d+)', description)
if match:
return match.group(1)
return None
# Read in the current.csv file and generate the output.csv file
with open('current.csv', 'r') as f, open('output.csv', 'w', newline='') as out_file:
reader = csv.DictReader(f)
writer = csv.DictWriter(out_file, fieldnames=['Date', 'Vendor', 'Description', 'Category', 'Amount'])
writer.writeheader()
for row in reader:
# Find the final.csv entry that matches the most number of words
# in the current.csv description
description_words = row['Description'].lower().split()
matching_entry = None
for desc, entries in final_data.items():
desc_words = desc.lower().split()
num_matches = sum(1 for word in description_words if word in desc_words)
if matching_entry is None or num_matches > matching_entry['num_matches']:
matching_entry = {'num_matches': num_matches, 'entry': entries[0]}
# Use the matching final.csv entry to populate the output.csv row
if matching_entry['num_matches'] > 0:
output_row = {
'Date': row['Date'],
'Description': row['Description'],
'Amount': row['Amount'],
}
# If the vendor starts with "ONLINE TRANSFER", extract the account number
vendor = matching_entry['entry']['Vendor']
if vendor.startswith('ONLINE TRANSFER'):
account_number = extract_account_number(row['Description'])
if account_number is not None:
vendor = f"ONLINE TRANSFER {account_number}"
output_row['Vendor'] = vendor
output_row['Category'] = matching_entry['entry']['Category']
else:
# If there is no matching entry, use "Ask My Accountant" as a placeholder
output_row = {
'Date': row['Date'],
'Vendor': 'UNKNOWN',
'Description': row['Description'],
'Category': 'Ask My Accountant',
'Amount': row['Amount'],
}
writer.writerow(output_row)