Skip to content

Commit

Permalink
Add parsing categories from file
Browse files Browse the repository at this point in the history
Prepopulate categories file with basic categories.

NOTE: It is expected that user will modify categories.json according to
his own needs (to get the best experience from this app)
  • Loading branch information
WojtekMs committed Dec 5, 2023
1 parent 3377479 commit 9aeec7d
Show file tree
Hide file tree
Showing 12 changed files with 470 additions and 14 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ __pycache__
.idea
unmatched_transactions.html
*.xlsx
output
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ dependencies = [
"html5lib",
"beautifulsoup4",
"py-moneyed",
"openpyxl"
"openpyxl",
"semver",
"importlib-resources"
]

[project.optional-dependencies] # Optional
Expand Down
54 changes: 41 additions & 13 deletions src/banker/__main__.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,58 @@
from banker.analyzer.analyze import analyze_transactions, deduce_month_year
from banker.data.category import Category, PaymentType
import argparse
import os.path

from importlib_resources import files

from banker.analyzer.analyze import analyze_transactions, deduce_month_year
from banker.data.category import Category

from banker.data.transaction import Transaction
from banker.formatter.month_year_formatter import format_month_year
from banker.parser.html_transactions_parser import HtmlTransactionsParser
from banker.formatter.html_transactions_formatter import HtmlTransactionsFormatter
from banker.parser.interfaces.categories_parser import ICategoriesParser
from banker.parser.interfaces.transactions_parser import ITransactionsParser
from banker.parser.json_categories_parser import JsonCategoriesParser
from banker.writer.excel_categories_writer import ExcelCategoriesWriter


def get_supported_categories(categories_parser: ICategoriesParser, categories_filepath: str) -> list[Category]:
with open(categories_filepath, "r") as file:
return categories_parser.parse_categories(file.read())


def get_transactions(transactions_parser: ITransactionsParser, transactions_filepath: str) -> list[Transaction]:
with open(transactions_filepath, "r") as transactions_file:
return transactions_parser.parse_transactions(transactions_file.read())


def save_to_file(filepath: str, content: str):
with open(filepath, "w") as file:
file.write(content)


def main():
supported_categories = [
Category(name="Kaufland", payment_type=PaymentType.Household, matching_regexes=[r"KAUFLAND PL"])]
transactions_parser = HtmlTransactionsParser()
categories_parser = JsonCategoriesParser()
transactions_formatter = HtmlTransactionsFormatter()
categories_writer = ExcelCategoriesWriter()

parser = argparse.ArgumentParser()
parser.add_argument("html_file")
parser.add_argument("--categories_file", default=files('banker.resources').joinpath('categories.json'))
parser.add_argument("--output_directory", default=files('banker.resources').joinpath('output'))
args = parser.parse_args()

with open(args.html_file, "rb") as input_file:
all_transactions = transactions_parser.parse_transactions(input_file.read().decode('utf-8'))
analyze_result = analyze_transactions(all_transactions, supported_categories)
formatted_transactions = transactions_formatter.format_transactions(analyze_result.unmatched_transactions)
with open("unmatched_transactions.html", "w") as transactions_file:
transactions_file.write(formatted_transactions)
month_year = deduce_month_year(all_transactions)
categories_writer.write_categories(analyze_result.matched_categories, "autogen_budget.xlsx",
format_month_year(month_year))
os.makedirs(args.output_directory, exist_ok=True)
output_unmatched_transactions_filepath = os.path.join(args.output_directory, "unmatched_transactions.html")
output_matched_categories_filepath = os.path.join(args.output_directory, "autogen_budget.xlsx")

all_transactions = get_transactions(transactions_parser, args.html_file)
month_year = deduce_month_year(all_transactions)
supported_categories = get_supported_categories(categories_parser, args.categories_file)
analyze_result = analyze_transactions(all_transactions, supported_categories)
formatted_transactions = transactions_formatter.format_transactions(analyze_result.unmatched_transactions)

save_to_file(output_unmatched_transactions_filepath, formatted_transactions)
categories_writer.write_categories(analyze_result.matched_categories, output_matched_categories_filepath,
format_month_year(month_year))
5 changes: 5 additions & 0 deletions src/banker/common/naming.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,8 @@
TRANSACTION_COL_NAME_DESCRIPTION = "Opis"
TRANSACTION_COL_NAME_VALUE = "Kwota"

CATEGORIES_KEY_NAME_VERSION = "version"
CATEGORIES_KEY_NAME_CATEGORIES = "categories"
CATEGORIES_KEY_NAME_CATEGORY_NAME = "name"
CATEGORIES_KEY_NAME_CATEGORY_PAYMENT_TYPE = "payment_type"
CATEGORIES_KEY_NAME_CATEGORY_REGEXES = "matching_regexes"
9 changes: 9 additions & 0 deletions src/banker/parser/interfaces/categories_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from abc import ABC, abstractmethod

from banker.data.category import Category


class ICategoriesParser(ABC):
@abstractmethod
def parse_categories(self, content: str) -> list[Category]:
raise NotImplementedError("Method not implemented in subclass")
90 changes: 90 additions & 0 deletions src/banker/parser/json_categories_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import logging
import semver
import json

from banker.common.naming import CATEGORIES_KEY_NAME_VERSION, CATEGORIES_KEY_NAME_CATEGORIES, \
CATEGORIES_KEY_NAME_CATEGORY_NAME, CATEGORIES_KEY_NAME_CATEGORY_PAYMENT_TYPE, CATEGORIES_KEY_NAME_CATEGORY_REGEXES
from banker.data.category import Category
from banker.parser.interfaces.categories_parser import ICategoriesParser
from banker.parser.payment_type_parser import parse_payment_type


class CategoriesVersionMissing(Exception):
def __str__(self):
return f"Key {CATEGORIES_KEY_NAME_VERSION} is missing in categories JSON file"


class CategoriesVersionInvalid(Exception):
def __init__(self, version: str):
self.__version = version

def __str__(self):
return f"Categories version has invalid format, " \
f"expected semantic versioning e.g: 1.0.0, actual: {self.__version}"


class CategoriesVersionUnsupported(Exception):
def __init__(self, supported_version: semver.Version, current_version: semver.Version):
self.__supported_version = supported_version
self.__current_version = current_version

def __str__(self):
return f"Categories version is unsupported by application, " \
f"supported version: {self.__supported_version}, current version: {self.__current_version}"


class CategoryNameDuplicate(Exception):
def __init__(self, name: str):
self.__name = name

def __str__(self):
return f"Categories names must be unique, but this category name is used multiple times: {self.__name}"


class JsonCategoriesParser(ICategoriesParser):
def __init__(self):
self.__supported_version = semver.Version(major=1, minor=0, patch=0)
self.__logger = logging.getLogger("JsonCategoriesParser")

def __validate_version(self, json_dict: dict):
version = json_dict.get(CATEGORIES_KEY_NAME_VERSION)
if version is None:
raise CategoriesVersionMissing()
if not semver.Version.is_valid(version):
raise CategoriesVersionInvalid(version)
version = semver.Version.parse(version)
if not self.__supported_version.is_compatible(version):
raise CategoriesVersionUnsupported(self.__supported_version, version)

def __contains_required_keys(self, category: dict) -> bool:
required_keys = [CATEGORIES_KEY_NAME_CATEGORY_NAME, CATEGORIES_KEY_NAME_CATEGORY_PAYMENT_TYPE,
CATEGORIES_KEY_NAME_CATEGORY_REGEXES]
for required_key in required_keys:
if required_key not in category:
self.__logger.info(f"Category object key missing: {required_key}")
return False
return True

def __valid_payment_type(self, category: dict) -> bool:
if parse_payment_type(category[CATEGORIES_KEY_NAME_CATEGORY_PAYMENT_TYPE]) is None:
self.__logger.info("Invalid payment type")
return False
return True

def parse_categories(self, content: str) -> list[Category]:
json_dict = json.loads(content)
self.__validate_version(json_dict)

result = {}
for category in json_dict.get(CATEGORIES_KEY_NAME_CATEGORIES, []):
if not self.__contains_required_keys(category):
continue
if not self.__valid_payment_type(category):
continue
name = category[CATEGORIES_KEY_NAME_CATEGORY_NAME]
if name in result:
raise CategoryNameDuplicate(name)
payment_type = parse_payment_type(category[CATEGORIES_KEY_NAME_CATEGORY_PAYMENT_TYPE])
matching_regexes = category[CATEGORIES_KEY_NAME_CATEGORY_REGEXES]
result[name] = Category(name, payment_type, matching_regexes)
return list(result.values())
14 changes: 14 additions & 0 deletions src/banker/parser/payment_type_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from banker.data.category import PaymentType


def parse_payment_type(payment_type: str) -> PaymentType | None:
match payment_type:
case 'household':
return PaymentType.Household
case 'recurring':
return PaymentType.Recurring
case 'occasional':
return PaymentType.Occasional
case 'optional':
return PaymentType.Optional
return None
Empty file.
152 changes: 152 additions & 0 deletions src/banker/resources/categories.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
{
"version": "1.0.0",
"categories": [
{
"name": "Kaufland",
"payment_type": "household",
"matching_regexes": [
"KAUFLAND PL"
]
},
{
"name": "Bilety PKP",
"payment_type": "occasional",
"matching_regexes": [
"intercity\\.pl"
]
},
{
"name": "Leclerc",
"payment_type": "household",
"matching_regexes": [
"eLeclerc"
]
},
{
"name": "Biedronka",
"payment_type": "household",
"matching_regexes": [
"BIEDRONKA"
]
},
{
"name": "Obuwie",
"payment_type": "occasional",
"matching_regexes": [
"eobuwie\\.com\\.pl"
]
},
{
"name": "Netto",
"payment_type": "household",
"matching_regexes": [
"NETTO"
]
},
{
"name": "Paliwo",
"payment_type": "household",
"matching_regexes": [
"ORLEN"
]
},
{
"name": "Darowizny",
"payment_type": "optional",
"matching_regexes": [
"DAROWIZNA"
]
},
{
"name": "Carrefour",
"payment_type": "household",
"matching_regexes": [
"CARREFOUR"
]
},
{
"name": "Piekarnie",
"payment_type": "household",
"matching_regexes": [
"(?i)piekarnia"
]
},
{
"name": "Drogerie",
"payment_type": "household",
"matching_regexes": [
"HEBE"
]
},
{
"name": "Pralnie",
"payment_type": "occasional",
"matching_regexes": [
"PRALNIA"
]
},
{
"name": "Bilety MPK Wrocław",
"payment_type": "household",
"matching_regexes": [
"URBANCARD"
]
},
{
"name": "Spotify",
"payment_type": "recurring",
"matching_regexes": [
"Spotify"
]
},
{
"name": "Action",
"payment_type": "household",
"matching_regexes": [
"Action"
]
},
{
"name": "Lidl",
"payment_type": "household",
"matching_regexes": [
"LIDL"
]
},
{
"name": "RTV Euro AGD",
"payment_type": "occasional",
"matching_regexes": [
"EURO\\-NET"
]
},
{
"name": "Abonament telefoniczny",
"payment_type": "recurring",
"matching_regexes": [
"24\\.play\\.pl"
]
},
{
"name": "Castorama",
"payment_type": "occassional",
"matching_regexes": [
"CASTORAMA"
]
},
{
"name": "McDonalds",
"payment_type": "optional",
"matching_regexes": [
"MCDONALDS"
]
},
{
"name": "Lody",
"payment_type": "optional",
"matching_regexes": [
"(?i)lodziarnia"
]
}
]
}
Loading

0 comments on commit 9aeec7d

Please sign in to comment.