From 2ed50d988d58687553b63f1b4d74436338498c77 Mon Sep 17 00:00:00 2001 From: WojtekMs <62173407+WojtekMs@users.noreply.github.com> Date: Fri, 1 Dec 2023 10:48:36 +0100 Subject: [PATCH] Add transaction analysis logic Given transactions and supported categories it is possible to analyze transactions. The result are both: - matched categories with summed transaction values - unmatched transactions that still need manual classification --- src/banker/__main__.py | 21 +- src/banker/analyzer/__init__.py | 0 src/banker/analyzer/analyze.py | 30 +++ src/banker/data/category.py | 20 +- src/banker/data/transaction.py | 11 +- src/banker/parser/html_transactions_parser.py | 7 +- tests/banker/analyzer/__init__.py | 0 tests/banker/analyzer/test_analyze.py | 199 ++++++++++++++++++ tests/banker/conftest.py | 11 +- tests/banker/data/__init__.py | 0 tests/banker/data/test_transaction.py | 137 ++++++++++++ 11 files changed, 425 insertions(+), 11 deletions(-) create mode 100644 src/banker/analyzer/__init__.py create mode 100644 src/banker/analyzer/analyze.py create mode 100644 tests/banker/analyzer/__init__.py create mode 100644 tests/banker/analyzer/test_analyze.py create mode 100644 tests/banker/data/__init__.py create mode 100644 tests/banker/data/test_transaction.py diff --git a/src/banker/__main__.py b/src/banker/__main__.py index dc195bb..06289a1 100644 --- a/src/banker/__main__.py +++ b/src/banker/__main__.py @@ -1,2 +1,21 @@ +from banker.analyzer.analyze import analyze_transactions +from banker.data.category import Category, PaymentType +import argparse + +from banker.parser.html_transactions_parser import HtmlTransactionsParser + + def main(): - print("Hello world from Banker!") + supported_categories = [ + Category(name="Kaufland", payment_type=PaymentType.Household, matching_regexes=[r"KAUFLAND PL"])] + transactions_parser = HtmlTransactionsParser() + + parser = argparse.ArgumentParser() + parser.add_argument("html_file") + args = parser.parse_args() + + with open(args.html_file, "rb") as file: + all_transactions = transactions_parser.parse_transactions(file.read().decode('utf-8')) + analyze_result = analyze_transactions(all_transactions, supported_categories) + print(analyze_result) + # TODO: format output diff --git a/src/banker/analyzer/__init__.py b/src/banker/analyzer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/banker/analyzer/analyze.py b/src/banker/analyzer/analyze.py new file mode 100644 index 0000000..36f5098 --- /dev/null +++ b/src/banker/analyzer/analyze.py @@ -0,0 +1,30 @@ +from dataclasses import dataclass + +from banker.data.category import Category +from banker.data.transaction import Transaction +from logging import getLogger + +analyze_logger = getLogger("Analyze") + + +@dataclass(frozen=True) +class AnalyzeResult: + unmatched_transactions: list[Transaction] + matched_categories: list[Category] + + +def analyze_transactions(transactions: list[Transaction], supported_categories: list[Category]) -> AnalyzeResult: + unmatched_transactions = [] + matched_categories = {} + for transaction in transactions: + matching_categories = transaction.find_matching(supported_categories) + matching_categories_count = len(matching_categories) + if matching_categories_count == 1: + category_name = matching_categories[0].get_name() + matched_category = matched_categories.setdefault(category_name, matching_categories[0]) + matched_category.value += transaction.value + else: + analyze_logger.info(f"Transaction: {transaction} matched to {matching_categories_count} categories") + unmatched_transactions.append(transaction) + return AnalyzeResult(unmatched_transactions=unmatched_transactions, + matched_categories=list(matched_categories.values())) diff --git a/src/banker/data/category.py b/src/banker/data/category.py index 90a4bd1..94708cc 100644 --- a/src/banker/data/category.py +++ b/src/banker/data/category.py @@ -5,10 +5,10 @@ class PaymentType(Enum): - household = auto() - recurring = auto() - optional = auto() - occasional = auto() + Household = auto() + Recurring = auto() + Optional = auto() + Occasional = auto() class Category: @@ -18,6 +18,18 @@ def __init__(self, name: str, payment_type: PaymentType, matching_regexes: list[ self.__matching_regexes: list[re.Pattern] = [re.compile(pattern) for pattern in matching_regexes] self.value = Money(amount='0', currency=PLN) + def __eq__(self, other): + if type(other) is type(self): + return self.__dict__ == other.__dict__ + return False + + def __str__(self): + return f"Category(name={self.__name}, payment_type={self.__payment_type}, " \ + f"matching_regexes={self.__matching_regexes}, value={self.value})" + + def __repr__(self): + return self.__str__() + def get_name(self) -> str: return self.__name diff --git a/src/banker/data/transaction.py b/src/banker/data/transaction.py index 4bc49ac..48f2213 100644 --- a/src/banker/data/transaction.py +++ b/src/banker/data/transaction.py @@ -8,12 +8,17 @@ class Transaction: date: str value: Money + type: str description: str def __post_init__(self): if self.value.currency != PLN: raise ValueError("The only accepted transaction currency is PLN") - def count_matching(self, categories: list[Category]) -> int: - # TODO: implement - return 0 + def find_matching(self, categories: list[Category]) -> list[Category]: + result = [] + for category in categories: + if any([True for pattern in category.get_matching_regexes() if + pattern.search(self.description) is not None]): + result.append(category) + return result diff --git a/src/banker/parser/html_transactions_parser.py b/src/banker/parser/html_transactions_parser.py index 0d73b31..3dc82c5 100644 --- a/src/banker/parser/html_transactions_parser.py +++ b/src/banker/parser/html_transactions_parser.py @@ -39,6 +39,11 @@ def parse_transactions(self, content: str) -> list[Transaction]: if value is None: self.logger.warning(f"Value not found in transaction {row_id}") continue + transaction_type = transaction.get("Typ transakcji") + if transaction_type is None: + self.logger.warning(f"Transaction type not found in transaction {row_id}") + continue result.append( - Transaction(date=date, description=description, value=Money(amount=str(value), currency=PLN))) + Transaction(date=date, description=description, value=Money(amount=str(value), currency=PLN), + type=transaction_type)) return result diff --git a/tests/banker/analyzer/__init__.py b/tests/banker/analyzer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/banker/analyzer/test_analyze.py b/tests/banker/analyzer/test_analyze.py new file mode 100644 index 0000000..5dc31c1 --- /dev/null +++ b/tests/banker/analyzer/test_analyze.py @@ -0,0 +1,199 @@ +from copy import deepcopy + +import pytest + +from moneyed import Money, PLN + +from banker.data.category import Category, PaymentType +from banker.data.transaction import Transaction +from banker.analyzer.analyze import analyze_transactions, AnalyzeResult + + +def make_category_with_value(category: Category, value: Money) -> Category: + category_copy = deepcopy(category) + category_copy.value = value + return category_copy + + +@pytest.mark.parametrize( + 'transactions, supported_categories, expected_result', + [ + ( + [ + Transaction(date="2023-01-01", value=Money(amount="-11.27", currency=PLN), description="New shoes", + type="Card") + ], + [ + Category(name="Shoes", payment_type=PaymentType.Optional, matching_regexes=[r"(?i)shoes"]) + ], + AnalyzeResult(unmatched_transactions=[], matched_categories=[ + make_category_with_value( + Category(name="Shoes", payment_type=PaymentType.Optional, matching_regexes=[r"(?i)shoes"]), + Money(amount="-11.27", currency=PLN)) + ]) + ), + ( + [ + Transaction(date="2023-01-01", value=Money(amount="-13.30", currency=PLN), + description="Amazing trekking shoes", type="Card"), + Transaction(date="2023-01-02", value=Money(amount="-16.70", currency=PLN), + description="Casual shoes", type="Card"), + Transaction(date="2023-01-03", value=Money(amount="-20.00", currency=PLN), + description="Dancing shoes", type="Card") + ], + [ + Category(name="Shoes", payment_type=PaymentType.Optional, matching_regexes=[r"(?i)shoes"]) + ], + AnalyzeResult(unmatched_transactions=[], matched_categories=[ + make_category_with_value( + Category(name="Shoes", payment_type=PaymentType.Optional, matching_regexes=[r"(?i)shoes"]), + Money(amount="-50.00", currency=PLN)) + ]) + ), + ( + [ + Transaction(date="2023-01-01", value=Money(amount="-15.00", currency=PLN), + description="Amazing trekking shoes", type="Card"), + Transaction(date="2023-01-02", value=Money(amount="-33.00", currency=PLN), + description="Cheap shirts", type="Card"), + Transaction(date="2023-01-03", value=Money(amount="-50.00", currency=PLN), + description="Expensive sweets", type="Card") + ], + [ + Category(name="Shoes", payment_type=PaymentType.Optional, matching_regexes=[r"(?i)shoes"]), + Category(name="Sweets", payment_type=PaymentType.Optional, matching_regexes=[r"(?i)sweets"]), + Category(name="Shirts", payment_type=PaymentType.Optional, matching_regexes=[r"(?i)shirts"]) + + ], + AnalyzeResult(unmatched_transactions=[], matched_categories=[ + make_category_with_value( + Category(name="Shoes", payment_type=PaymentType.Optional, matching_regexes=[r"(?i)shoes"]), + Money(amount="-15.00", currency=PLN)), + make_category_with_value( + Category(name="Shirts", payment_type=PaymentType.Optional, matching_regexes=[r"(?i)shirts"]), + Money(amount="-33.00", currency=PLN)), + make_category_with_value( + Category(name="Sweets", payment_type=PaymentType.Optional, matching_regexes=[r"(?i)sweets"]), + Money(amount="-50.00", currency=PLN)) + ]) + ), + ( + [ + Transaction(date="2023-01-01", value=Money(amount="-11.27", currency=PLN), description="New shoes", + type="Card"), + Transaction(date="2023-01-02", value=Money(amount="-500.00", currency=PLN), description="New game", + type="Card") + ], + [ + Category(name="Shoes", payment_type=PaymentType.Optional, matching_regexes=[r"(?i)shoes"]) + ], + + AnalyzeResult( + unmatched_transactions=[ + Transaction(date="2023-01-02", value=Money(amount="-500.00", currency=PLN), + description="New game", type="Card")], + matched_categories=[ + make_category_with_value( + Category(name="Shoes", payment_type=PaymentType.Optional, matching_regexes=[r"(?i)shoes"]), + Money(amount="-11.27", currency=PLN)) + ]) + ), + ( + [ + Transaction(date="2023-01-01", value=Money(amount="-11.27", currency=PLN), description="New shoes", + type="Card"), + ], + [ + Category(name="Shoes", payment_type=PaymentType.Optional, matching_regexes=[r"(?i)shoes"]), + Category(name="New stuff", payment_type=PaymentType.Optional, matching_regexes=[r"(?i)new"]) + ], + + AnalyzeResult( + unmatched_transactions=[Transaction(date="2023-01-01", value=Money(amount="-11.27", currency=PLN), + description="New shoes", type="Card")], + matched_categories=[]) + ), + ( + [ + Transaction(date="2023-01-01", value=Money(amount="-11.27", currency=PLN), description="New shoes", + type="Card"), + ], + [ + Category(name="Shoes", payment_type=PaymentType.Optional, matching_regexes=[r"(?i)shoes"]), + Category(name="Weirdo", payment_type=PaymentType.Optional, matching_regexes=[r"what?"]) + ], + + AnalyzeResult( + unmatched_transactions=[], + matched_categories=[ + make_category_with_value( + Category(name="Shoes", payment_type=PaymentType.Optional, matching_regexes=[r"(?i)shoes"]), + Money(amount="-11.27", currency=PLN)) + ]) + ), + ( + [ + Transaction(date="2023-01-01", value=Money(amount="-11.27", currency=PLN), description="New shoes", + type="Card"), + ], + [ + Category(name="Weirdo", payment_type=PaymentType.Optional, matching_regexes=[r"what?"]) + ], + + AnalyzeResult( + unmatched_transactions=[Transaction(date="2023-01-01", value=Money(amount="-11.27", currency=PLN), + description="New shoes", type="Card")], + matched_categories=[]) + ), + ( + [], + [ + Category(name="Shoes", payment_type=PaymentType.Optional, matching_regexes=[r"(?i)shoes"]), + ], + + AnalyzeResult( + unmatched_transactions=[], + matched_categories=[]) + ), + ( + [ + Transaction(date="2023-01-01", value=Money(amount="-11.27", currency=PLN), description="New shoes", + type="Card"), + Transaction(date="2023-01-02", value=Money(amount="-500.00", currency=PLN), description="New game", + type="Card") + ], + [], + + AnalyzeResult( + unmatched_transactions=[Transaction(date="2023-01-01", value=Money(amount="-11.27", currency=PLN), + description="New shoes", type="Card"), + Transaction(date="2023-01-02", value=Money(amount="-500.00", currency=PLN), + description="New game", type="Card")], + matched_categories=[]) + ), + ( + [], + [], + + AnalyzeResult( + unmatched_transactions=[], + matched_categories=[]) + ), + ], + ids=["given matching transaction to one category then return matched category with increased value", + "given many matching transactions to one category then return matched category with increased value", + "given many transactions and many categories then return all matched categories with increased values", + "given transaction that does not match then return unmatched transaction", + "given matching transaction to many categories then return unmatched transaction", + "given category that was not matched then exclude it from matched categories", + "given one transaction that does not match then return unmatched transaction and no categories", + "given no transactions then return empty list of both transactions and categories", + "given transactions and empty list of categories then return all transactions as unmatched", + "given empty transactions and empty categories then return empty lists"] +) +def test_given_transactions_and_supported_categories_when_analyze_then_return_result( + transactions, supported_categories, expected_result +): + actual_result = analyze_transactions(transactions, supported_categories) + + assert actual_result == expected_result diff --git a/tests/banker/conftest.py b/tests/banker/conftest.py index 337223d..15e893a 100644 --- a/tests/banker/conftest.py +++ b/tests/banker/conftest.py @@ -13,6 +13,7 @@ def transaction1(): "Data i czas operacji : 2023-10-30 " "Oryginalna kwota operacji : 37.35 " "Numer karty : 516931******3943", + type="Płatność kartą", value=Money(amount='-37.35', currency=PLN)) @@ -26,6 +27,7 @@ def transaction2(): "Data i czas operacji : 2023-10-30 " "Oryginalna kwota operacji : 200.00 " "Numer karty : 516931******3943", + type="Wypłata z bankomatu", value=Money(amount='-200.00', currency=PLN)) @@ -37,6 +39,7 @@ def transaction3(): "Adres : intercity.pl " "'Operacja : 00000076965444780 " "Numer referencyjny : 00000076965444780", + type="Płatność web - kod mobilny", value=Money(amount='-49.02', currency=PLN)) @@ -46,11 +49,15 @@ def transaction4(): "Adres nadawcy : " "UL.GULASZOWA 0 " "00-001 WROCŁAW POL " - "Tytuł : WPŁATA", value=Money(amount='800.00', currency=PLN)) + "Tytuł : WPŁATA", + type="Wpłata gotówkowa w kasie", + value=Money(amount='800.00', currency=PLN)) @pytest.fixture def transaction5(): return Transaction(date="2023-10-08", description="Rachunek odbiorcy : 000000000000000000000 " "Nazwa odbiorcy : Alicja " - "Tytuł : Na korki", value=Money(amount='-50.00', currency=PLN)) + "Tytuł : Na korki", + type="Zlecenie stałe", + value=Money(amount='-50.00', currency=PLN)) diff --git a/tests/banker/data/__init__.py b/tests/banker/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/banker/data/test_transaction.py b/tests/banker/data/test_transaction.py new file mode 100644 index 0000000..20b866a --- /dev/null +++ b/tests/banker/data/test_transaction.py @@ -0,0 +1,137 @@ +import pytest + +from moneyed import Money, PLN + +from banker.data.category import Category, PaymentType +from banker.data.transaction import Transaction + + +@pytest.fixture +def transaction_sut(): + return Transaction(date="2023-11-01", value=Money(amount='-37.35', currency=PLN), + type="Card", + description="000015792 05272423303314705681107 " + "Lokalizacja : " + "Adres : KAUFLAND PL 6663 " + "Miasto : Gliwice " + "Kraj : POLSKA " + "Data i czas operacji : 2023-10-30 " + "Oryginalna kwota operacji : 37.35 " + "Numer karty : 516931******3943") + + +@pytest.mark.parametrize( + 'categories, expected_result', + [ + ( + [ + Category(name="Kaufland", payment_type=PaymentType.Household, matching_regexes=[r"KAUFLAND PL"]) + ], + [ + Category(name="Kaufland", payment_type=PaymentType.Household, matching_regexes=[r"KAUFLAND PL"]) + ] + ), + ( + [ + Category(name="Kaufland", payment_type=PaymentType.Household, matching_regexes=[r"KAUFLAND PL"]), + Category(name="Gliwice", payment_type=PaymentType.Optional, matching_regexes=[r"(?i)gliwice"]), + Category(name="Polska", payment_type=PaymentType.Occasional, matching_regexes=[r"P.L.KA"]) + ], + [ + Category(name="Kaufland", payment_type=PaymentType.Household, matching_regexes=[r"KAUFLAND PL"]), + Category(name="Gliwice", payment_type=PaymentType.Optional, matching_regexes=[r"(?i)gliwice"]), + Category(name="Polska", payment_type=PaymentType.Occasional, matching_regexes=[r"P.L.KA"]) + ], + ), + ( + [ + Category(name="Kaufland", payment_type=PaymentType.Household, matching_regexes=[r"KAUFLAND PL"]), + Category(name="Bad category", payment_type=PaymentType.Optional, matching_regexes=[r"donald duck"]), + Category(name="Polska", payment_type=PaymentType.Occasional, matching_regexes=[r"P.L.KA"]) + ], + [ + Category(name="Kaufland", payment_type=PaymentType.Household, matching_regexes=[r"KAUFLAND PL"]), + Category(name="Polska", payment_type=PaymentType.Occasional, matching_regexes=[r"P.L.KA"]) + ], + ), + ( + [ + Category(name="Bad category 0", payment_type=PaymentType.Household, matching_regexes=[r"rick"]), + Category(name="Bad category 1", payment_type=PaymentType.Optional, matching_regexes=[r"jack"]), + Category(name="Bad category 2", payment_type=PaymentType.Occasional, matching_regexes=[r"sack"]) + ], + [] + ), + ( + [], + [] + ), + ( + [ + Category(name="Kaufland", payment_type=PaymentType.Household, matching_regexes=["KAUF"]), + Category(name="Gliwice", payment_type=PaymentType.Optional, matching_regexes=["(?i)gliwice"]), + Category(name="Polska", payment_type=PaymentType.Occasional, matching_regexes=["P.L.KA"]) + ], + [ + Category(name="Kaufland", payment_type=PaymentType.Household, matching_regexes=["KAUF"]), + Category(name="Gliwice", payment_type=PaymentType.Optional, matching_regexes=["(?i)gliwice"]), + Category(name="Polska", payment_type=PaymentType.Occasional, matching_regexes=["P.L.KA"]) + ], + ), + ( + [ + Category(name="Kaufland", payment_type=PaymentType.Household, + matching_regexes=["KAUF", "KAUFLAND PL", "KAU.LAND"]), + ], + [ + Category(name="Kaufland", payment_type=PaymentType.Household, + matching_regexes=["KAUF", "KAUFLAND PL", "KAU.LAND"]), + ], + ), + ( + [ + Category(name="Kaufland", payment_type=PaymentType.Household, + matching_regexes=["KAUF", "KAUFLAND PL", "KAU.LAND"]), + Category(name="Polska", payment_type=PaymentType.Household, + matching_regexes=["POL", "POLSKA", "PO.S"]), + ], + [ + Category(name="Kaufland", payment_type=PaymentType.Household, + matching_regexes=["KAUF", "KAUFLAND PL", "KAU.LAND"]), + Category(name="Polska", payment_type=PaymentType.Household, + matching_regexes=["POL", "POLSKA", "PO.S"]), + ], + ), + ( + [ + Category(name="Empty patterns", payment_type=PaymentType.Household, matching_regexes=[]), + ], + [] + ), + ( + [ + Category(name="Longer pattern", payment_type=PaymentType.Household, + matching_regexes=["2023-10-30 Oryginalna kwota operacji"]), + ], + [ + Category(name="Longer pattern", payment_type=PaymentType.Household, + matching_regexes=["2023-10-30 Oryginalna kwota operacji"]), + ], + ), + ], + ids=["one category is matching", + "all categories are matching", + "some categories are matching", + "none categories are matching", + "empty categories list", + "regexes in regular string", + "multiple matching regexes in one category", + "multiple matching regexes in two categories", + "one category with empty matching regexes list", + "one category with longer regex"] +) +def test_given_categories_when_find_matching_then_return_list_of_matched_categories(transaction_sut, categories, + expected_result): + actual_result = transaction_sut.find_matching(categories) + + assert actual_result == expected_result