Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add parsing categories from file #11

Merged
merged 1 commit into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ __pycache__
.idea
unmatched_transactions.html
*.xlsx
output
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ dependencies = [
"html5lib",
"beautifulsoup4",
"py-moneyed",
"openpyxl"
"openpyxl",
"semver",
"importlib-resources"
]

[project.optional-dependencies] # Optional
Expand Down
54 changes: 41 additions & 13 deletions src/banker/__main__.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,58 @@
from banker.analyzer.analyze import analyze_transactions, deduce_month_year
from banker.data.category import Category, PaymentType
import argparse
import os.path

from importlib_resources import files

from banker.analyzer.analyze import analyze_transactions, deduce_month_year
from banker.data.category import Category

from banker.data.transaction import Transaction
from banker.formatter.month_year_formatter import format_month_year
from banker.parser.html_transactions_parser import HtmlTransactionsParser
from banker.formatter.html_transactions_formatter import HtmlTransactionsFormatter
from banker.parser.interfaces.categories_parser import ICategoriesParser
from banker.parser.interfaces.transactions_parser import ITransactionsParser
from banker.parser.json_categories_parser import JsonCategoriesParser
from banker.writer.excel_categories_writer import ExcelCategoriesWriter


def get_supported_categories(categories_parser: ICategoriesParser, categories_filepath: str) -> list[Category]:
with open(categories_filepath, "r") as file:
return categories_parser.parse_categories(file.read())


def get_transactions(transactions_parser: ITransactionsParser, transactions_filepath: str) -> list[Transaction]:
with open(transactions_filepath, "r") as transactions_file:
return transactions_parser.parse_transactions(transactions_file.read())


def save_to_file(filepath: str, content: str):
with open(filepath, "w") as file:
file.write(content)


def main():
supported_categories = [
Category(name="Kaufland", payment_type=PaymentType.Household, matching_regexes=[r"KAUFLAND PL"])]
transactions_parser = HtmlTransactionsParser()
categories_parser = JsonCategoriesParser()
transactions_formatter = HtmlTransactionsFormatter()
categories_writer = ExcelCategoriesWriter()

parser = argparse.ArgumentParser()
parser.add_argument("html_file")
parser.add_argument("--categories_file", default=files('banker.resources').joinpath('categories.json'))
parser.add_argument("--output_directory", default=files('banker.resources').joinpath('output'))
args = parser.parse_args()

with open(args.html_file, "rb") as input_file:
all_transactions = transactions_parser.parse_transactions(input_file.read().decode('utf-8'))
analyze_result = analyze_transactions(all_transactions, supported_categories)
formatted_transactions = transactions_formatter.format_transactions(analyze_result.unmatched_transactions)
with open("unmatched_transactions.html", "w") as transactions_file:
transactions_file.write(formatted_transactions)
month_year = deduce_month_year(all_transactions)
categories_writer.write_categories(analyze_result.matched_categories, "autogen_budget.xlsx",
format_month_year(month_year))
os.makedirs(args.output_directory, exist_ok=True)
output_unmatched_transactions_filepath = os.path.join(args.output_directory, "unmatched_transactions.html")
output_matched_categories_filepath = os.path.join(args.output_directory, "autogen_budget.xlsx")

all_transactions = get_transactions(transactions_parser, args.html_file)
month_year = deduce_month_year(all_transactions)
supported_categories = get_supported_categories(categories_parser, args.categories_file)
analyze_result = analyze_transactions(all_transactions, supported_categories)
formatted_transactions = transactions_formatter.format_transactions(analyze_result.unmatched_transactions)

save_to_file(output_unmatched_transactions_filepath, formatted_transactions)
categories_writer.write_categories(analyze_result.matched_categories, output_matched_categories_filepath,
format_month_year(month_year))
1 change: 0 additions & 1 deletion src/banker/analyzer/analyze.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import datetime
from dataclasses import dataclass

from moneyed import Money, PLN
Expand Down
5 changes: 5 additions & 0 deletions src/banker/common/naming.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,8 @@
TRANSACTION_COL_NAME_DESCRIPTION = "Opis"
TRANSACTION_COL_NAME_VALUE = "Kwota"

CATEGORIES_KEY_NAME_VERSION = "version"
CATEGORIES_KEY_NAME_CATEGORIES = "categories"
CATEGORIES_KEY_NAME_CATEGORY_NAME = "name"
CATEGORIES_KEY_NAME_CATEGORY_PAYMENT_TYPE = "payment_type"
CATEGORIES_KEY_NAME_CATEGORY_REGEXES = "matching_regexes"
9 changes: 9 additions & 0 deletions src/banker/parser/interfaces/categories_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from abc import ABC, abstractmethod

from banker.data.category import Category


class ICategoriesParser(ABC):
@abstractmethod
def parse_categories(self, content: str) -> list[Category]:
raise NotImplementedError("Method not implemented in subclass")
90 changes: 90 additions & 0 deletions src/banker/parser/json_categories_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import logging
import semver
import json

from banker.common.naming import CATEGORIES_KEY_NAME_VERSION, CATEGORIES_KEY_NAME_CATEGORIES, \
CATEGORIES_KEY_NAME_CATEGORY_NAME, CATEGORIES_KEY_NAME_CATEGORY_PAYMENT_TYPE, CATEGORIES_KEY_NAME_CATEGORY_REGEXES
from banker.data.category import Category
from banker.parser.interfaces.categories_parser import ICategoriesParser
from banker.parser.payment_type_parser import parse_payment_type


class CategoriesVersionMissing(Exception):
def __str__(self):
return f"Key {CATEGORIES_KEY_NAME_VERSION} is missing in categories JSON file"


class CategoriesVersionInvalid(Exception):
def __init__(self, version: str):
self.__version = version

def __str__(self):
return f"Categories version has invalid format, " \
f"expected semantic versioning e.g: 1.0.0, actual: {self.__version}"


class CategoriesVersionUnsupported(Exception):
def __init__(self, supported_version: semver.Version, current_version: semver.Version):
self.__supported_version = supported_version
self.__current_version = current_version

def __str__(self):
return f"Categories version is unsupported by application, " \
f"supported version: {self.__supported_version}, current version: {self.__current_version}"


class CategoryNameDuplicate(Exception):
def __init__(self, name: str):
self.__name = name

def __str__(self):
return f"Categories names must be unique, but this category name is used multiple times: {self.__name}"


class JsonCategoriesParser(ICategoriesParser):
def __init__(self):
self.__supported_version = semver.Version(major=1, minor=0, patch=0)
self.__logger = logging.getLogger("JsonCategoriesParser")

def __validate_version(self, json_dict: dict):
version = json_dict.get(CATEGORIES_KEY_NAME_VERSION)
if version is None:
raise CategoriesVersionMissing()
if not semver.Version.is_valid(version):
raise CategoriesVersionInvalid(version)
version = semver.Version.parse(version)
if not self.__supported_version.is_compatible(version):
raise CategoriesVersionUnsupported(self.__supported_version, version)

def __contains_required_keys(self, category: dict) -> bool:
required_keys = [CATEGORIES_KEY_NAME_CATEGORY_NAME, CATEGORIES_KEY_NAME_CATEGORY_PAYMENT_TYPE,
CATEGORIES_KEY_NAME_CATEGORY_REGEXES]
for required_key in required_keys:
if required_key not in category:
self.__logger.info(f"Category object key missing: {required_key}")
return False
return True

def __valid_payment_type(self, category: dict) -> bool:
if parse_payment_type(category[CATEGORIES_KEY_NAME_CATEGORY_PAYMENT_TYPE]) is None:
self.__logger.info("Invalid payment type")
return False
return True

def parse_categories(self, content: str) -> list[Category]:
json_dict = json.loads(content)
self.__validate_version(json_dict)

result = {}
for category in json_dict.get(CATEGORIES_KEY_NAME_CATEGORIES, []):
if not self.__contains_required_keys(category):
continue
if not self.__valid_payment_type(category):
continue
name = category[CATEGORIES_KEY_NAME_CATEGORY_NAME]
if name in result:
raise CategoryNameDuplicate(name)
payment_type = parse_payment_type(category[CATEGORIES_KEY_NAME_CATEGORY_PAYMENT_TYPE])
matching_regexes = category[CATEGORIES_KEY_NAME_CATEGORY_REGEXES]
result[name] = Category(name, payment_type, matching_regexes)
return list(result.values())
14 changes: 14 additions & 0 deletions src/banker/parser/payment_type_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from banker.data.category import PaymentType


def parse_payment_type(payment_type: str) -> PaymentType | None:
match payment_type:
case 'household':
return PaymentType.Household
case 'recurring':
return PaymentType.Recurring
case 'occasional':
return PaymentType.Occasional
case 'optional':
return PaymentType.Optional
return None
Empty file.
152 changes: 152 additions & 0 deletions src/banker/resources/categories.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
{
"version": "1.0.0",
"categories": [
{
"name": "Kaufland",
"payment_type": "household",
"matching_regexes": [
"KAUFLAND PL"
]
},
{
"name": "Bilety PKP",
"payment_type": "occasional",
"matching_regexes": [
"intercity\\.pl"
]
},
{
"name": "Leclerc",
"payment_type": "household",
"matching_regexes": [
"eLeclerc"
]
},
{
"name": "Biedronka",
"payment_type": "household",
"matching_regexes": [
"BIEDRONKA"
]
},
{
"name": "Obuwie",
"payment_type": "occasional",
"matching_regexes": [
"eobuwie\\.com\\.pl"
]
},
{
"name": "Netto",
"payment_type": "household",
"matching_regexes": [
"NETTO"
]
},
{
"name": "Paliwo",
"payment_type": "household",
"matching_regexes": [
"ORLEN"
]
},
{
"name": "Darowizny",
"payment_type": "optional",
"matching_regexes": [
"DAROWIZNA"
]
},
{
"name": "Carrefour",
"payment_type": "household",
"matching_regexes": [
"CARREFOUR"
]
},
{
"name": "Piekarnie",
"payment_type": "household",
"matching_regexes": [
"(?i)piekarnia"
]
},
{
"name": "Drogerie",
"payment_type": "household",
"matching_regexes": [
"HEBE"
]
},
{
"name": "Pralnie",
"payment_type": "occasional",
"matching_regexes": [
"PRALNIA"
]
},
{
"name": "Bilety MPK Wrocław",
"payment_type": "household",
"matching_regexes": [
"URBANCARD"
]
},
{
"name": "Spotify",
"payment_type": "recurring",
"matching_regexes": [
"Spotify"
]
},
{
"name": "Action",
"payment_type": "household",
"matching_regexes": [
"Action"
]
},
{
"name": "Lidl",
"payment_type": "household",
"matching_regexes": [
"LIDL"
]
},
{
"name": "RTV Euro AGD",
"payment_type": "occasional",
"matching_regexes": [
"EURO\\-NET"
]
},
{
"name": "Abonament telefoniczny",
"payment_type": "recurring",
"matching_regexes": [
"24\\.play\\.pl"
]
},
{
"name": "Castorama",
"payment_type": "occassional",
"matching_regexes": [
"CASTORAMA"
]
},
{
"name": "McDonalds",
"payment_type": "optional",
"matching_regexes": [
"MCDONALDS"
]
},
{
"name": "Lody",
"payment_type": "optional",
"matching_regexes": [
"(?i)lodziarnia"
]
}
]
}
Loading
Loading