From 5dfdff3da826dd9f5f6799c39f19bafb71f308f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Witkowski?= Date: Sat, 3 Aug 2024 16:34:45 +0200 Subject: [PATCH] Backend: more consistent api urls, better ordering of spider pipelines, integration tests for pipelines and db (#19) Introduced TestContainers --- azure/setup_azure_infra.sh | 3 +- backend/api/flask_app.py | 14 +- backend/db.py | 29 ++-- backend/pipelines.py | 61 +++++---- backend/run_tests.sh | 5 +- backend/settings.py | 6 +- backend/spiders/FlugzeugMarktDeSpider.py | 6 +- backend/spiders/PlaneCheckComSpider.py | 6 +- backend/spiders/SoaringDeSpider.py | 6 +- backend/tests/requirements.txt | 5 +- backend/tests/test_FlugzeugMarktDeSpider.py | 2 +- backend/tests/test_PlaneCheckComSpider.py | 7 +- backend/tests/test_SoaringDeSpider.py | 5 +- backend/tests/test__testcontainers_setup.py | 17 +++ backend/tests/test_db.py | 53 ++++++++ backend/tests/test_pipelines.py | 124 +++++++++++++++++- .../V1.1__aero_offers_created_idx.sql | 1 - db/migrations/V2__aero_offers_date_idx.sql | 1 + db/migrations/disclaimer.txt | 1 + ui/.env.development | 2 +- ui/.env.production | 2 +- ui/src/components/AircraftModelFilter.vue | 2 +- ui/src/views/OfferDetails.vue | 2 +- ui/src/views/OffersList.vue | 2 +- 24 files changed, 272 insertions(+), 90 deletions(-) create mode 100644 backend/tests/test__testcontainers_setup.py create mode 100644 backend/tests/test_db.py delete mode 100644 db/migrations/V1.1__aero_offers_created_idx.sql create mode 100644 db/migrations/V2__aero_offers_date_idx.sql create mode 100644 db/migrations/disclaimer.txt diff --git a/azure/setup_azure_infra.sh b/azure/setup_azure_infra.sh index b6ea925..c43d02e 100644 --- a/azure/setup_azure_infra.sh +++ b/azure/setup_azure_infra.sh @@ -34,7 +34,8 @@ az containerapp create \ --environment $ENV_NAME \ --registry-server $ACR \ --image $ACR/aerooffers-api:$DOCKER_IMAGE_TAG \ - --env-vars DB_HOST="???" DB_PORT="5432" DB_NAME="???" DB_USER="???" DB_PW="???" \ + --secrets "db-user=$DB_USER" "db-password=$DB_PASS" \ + --env-vars "DB_HOST=$DB_HOST" "DB_PORT=$DB_PORT" "DB_NAME=$DB_NAME" "DB_USER=secretref:db-user" "DB_PW=secretref:db-password" \ --target-port 80 \ --ingress internal \ --transport tcp \ diff --git a/backend/api/flask_app.py b/backend/api/flask_app.py index 60507a4..360e11d 100644 --- a/backend/api/flask_app.py +++ b/backend/api/flask_app.py @@ -7,13 +7,18 @@ app = Flask(__name__) CORS(app, resources={r"/*": {"origins": "*"}}) +@app.route("/api/models") +@headers({'Cache-Control':'public, max-age=360'}) +def aircraft_models(): + return jsonify(classifier.get_all_models()) + @app.route('/api/offers') def offers(): return jsonify(db.get_offers_dict(aircraft_type=request.args.get('aircraft_type'), offset=request.args.get('offset'), limit=request.args.get('limit'))) -@app.route("/api/model//") +@app.route("/api/offers//") def model_information(manufacturer, model): """ Returns statistics for a specific manufacturer and model @@ -26,10 +31,5 @@ def model_information(manufacturer, model): manufacturer_info["offers"] = db.get_offers_for_model(manufacturer, model) return jsonify(manufacturer_info) -@app.route("/api/models") -@headers({'Cache-Control':'public, max-age=360'}) -def aircraft_models(): - return jsonify(classifier.get_all_models()) - if __name__ == '__main__': - app.run(host='127.0.0.1', port=8080, debug=True) + app.run(host='127.0.0.1', port=8080, debug=False) diff --git a/backend/db.py b/backend/db.py index daf516e..c99ad67 100644 --- a/backend/db.py +++ b/backend/db.py @@ -3,17 +3,16 @@ from sqlalchemy import * from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker -from sqlalchemy.sql import select +from sqlalchemy.sql import text from sqlalchemy.types import Date, DateTime, Unicode, Numeric, Integer from my_logging import * -from settings import DB_NAME, DB_USER, DB_PW, DB_HOST, DB_PORT +from settings import DB_HOST, DB_PORT, DB_NAME, DB_USER, DB_PW logger = logging.getLogger('db') Base = declarative_base() - class AircraftOffer(Base): __tablename__ = "aircraft_offer" @@ -50,8 +49,7 @@ def as_dict(self): "location": self.location, "aircraft_type": self.aircraft_type, "manufacturer": self.manufacturer, - "model": self.model, - "classified": self.classified + "model": self.model } class ExchangeRate(Base): @@ -66,10 +64,11 @@ class ExchangeRate(Base): engine = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'.format(DB_USER, DB_PW, DB_HOST, DB_PORT, DB_NAME)) Session = sessionmaker(bind=engine) - -def create_tables(): - Base.metadata.create_all(engine) - +def truncate_offers(): + session = Session() + session.execute(text("TRUNCATE aircraft_offer")) + session.commit() + session.close() def store_entity(entity): session = Session() @@ -91,16 +90,18 @@ def update_exchange_rate(exchange_rate): session.commit() -def has_offer_url(offer_url): +def offer_url_exists(offer_url): + session = Session() try: - query = select(select(AircraftOffer.offer_url).where(AircraftOffer.offer_url == offer_url).exists()) - conn = engine.connect() - result = conn.execute(query).fetchone() - return result is not None and result[0] == True + q = session.query(AircraftOffer).where(AircraftOffer.offer_url == offer_url).exists() + result = session.query(q).one() + return result is not None and result[0] except Exception as e: logger.error(e) logger.error("database error, assuming we don't have this offer already") return False + finally: + session.close() def get_exchange_rates_as_dict(session): all_exchange_rates = session.query(ExchangeRate).all() diff --git a/backend/pipelines.py b/backend/pipelines.py index 436d376..45ee9dc 100644 --- a/backend/pipelines.py +++ b/backend/pipelines.py @@ -1,39 +1,42 @@ import datetime - from scrapy.exceptions import DropItem +from price_parser import Price import db from my_logging import * -from spiders import SoaringDeSpider, FlugzeugMarktDeSpider, PlaneCheckComSpider from exchange_rates import get_currency_code logger = logging.getLogger('pipeline') - class DuplicateDetection(object): - def process_item(self, item, spider): - logger.debug("Detecting duplicates for item %s", str(item)) - if spider.name in [SoaringDeSpider.SoaringDeSpider.name, - FlugzeugMarktDeSpider.FlugzeugMarktDeSpider.name, - PlaneCheckComSpider.PlaneCheckComSpider.name]: - has_offer = db.has_offer_url(item["offer_url"]) - if has_offer: - logger.debug("Offer URL matches, Offer is already stored. dropping item") - raise DropItem("Offer already stored") - elif item['price'].amount is None: - raise DropItem("Offer has no price") - else: - logger.warning("Can't handle this spider for duplicate detection: %s", spider.name) + def process_item(self, item, _): + if db.offer_url_exists(item["offer_url"]): + logger.debug("Offer already exists in DB, url={0}".format(item["offer_url"])) + raise DropItem("Offer already exists in DB, url={0}".format(item["offer_url"])) return item -class FilterUnreasonablePrices(object): +class PriceParser(object): def process_item(self, item, _): - logger.debug("Filtering Prices of 1 and below") - if item["price"] and item["price"].amount <= 1: - raise DropItem("Offer has price of 1 (or below)") + price = Price.fromstring(item['raw_price']) + if price is None or price.amount is None: + msg = "Offer has no valid price, raw_price='{0}' url={1}".format(item['raw_price'].strip(), item["offer_url"]) + logger.info(msg) + raise DropItem(msg) + + if price.amount <= 1: + msg = "Offer has unreasonable price smaller than 1, price={0}, url={1}".format(price.amount_text, item["offer_url"]) + logger.info(msg) + raise DropItem(msg) + + if price.amount > 500_000: + msg = "Offer has unreasonable price higher than 500_000, price={0}, url={1} ".format(price.amount_text, item["offer_url"]) + logger.info(msg) + raise DropItem(msg) + + item['price'] = price return item @@ -42,26 +45,26 @@ class FilterSearchAndCharterOffers(object): charter_offer_terms = ["charter", "for rent"] def process_item(self, item, _): - logger.debug("Filtering Searches for Aircraft Offers") for search_offer_term in self.search_offer_terms: if search_offer_term in item["title"].lower(): - logger.info("dropping search offer: " + str(item["title"])) + logger.info("Dropping search offer, title='%s' url=%s", item["title"], item["offer_url"]) raise DropItem("Dropping Search offer") for charter_term in self.charter_offer_terms: if charter_term in item["title"].lower(): - logger.info("dropping charter offer: " + str(item["title"])) + logger.info("Dropping charter offer, title='%s' url=%s", item["title"], item["offer_url"]) raise DropItem("Dropping Charter Offer") return item class StoragePipeline(object): - def process_item(self, item, spider): - spider.crawler.stats.inc_value('items_stored') - logger.debug("Storing offer %s", str(item)) - logging.debug("Fetching currency code") + def process_item(self, item, spider=None): + if spider is not None: + spider.crawler.stats.inc_value('items_stored') + currency_code = get_currency_code(item["price"]) - logging.debug("currency code is {0}".format(currency_code)) + logger.debug("Storing offer title='%s', url=%s, currency_code=%s", item["title"], item["offer_url"], currency_code) + db.store_offer(db.AircraftOffer( title=item["title"], creation_datetime=datetime.datetime.now(), @@ -71,7 +74,7 @@ def process_item(self, item, spider): currency_code=currency_code, location=item["location"], offer_url=item["offer_url"], - spider=spider.name, + spider=spider.name if spider is not None else "unknown", hours=item["hours"], starts=item["starts"], detail_text=item["detail_text"], diff --git a/backend/run_tests.sh b/backend/run_tests.sh index bc16678..03aa415 100755 --- a/backend/run_tests.sh +++ b/backend/run_tests.sh @@ -1,10 +1,7 @@ export PYTHONPATH=$PYTHONPATH':./' -python3 -m unittest -f +python3 -m unittest -v if [[ $? -ne 0 ]]; then - docker rm -f test-db exit 1 -else - docker rm -f test-db fi \ No newline at end of file diff --git a/backend/settings.py b/backend/settings.py index 81c2517..40ca5a8 100644 --- a/backend/settings.py +++ b/backend/settings.py @@ -15,8 +15,8 @@ # scrapy pipeline components config, do not delete this ITEM_PIPELINES = { - 'pipelines.DuplicateDetection': 100, - 'pipelines.FilterUnreasonablePrices': 200, - 'pipelines.FilterSearchAndCharterOffers': 300, + 'pipelines.PriceParser': 100, + 'pipelines.FilterSearchAndCharterOffers': 200, + 'pipelines.DuplicateDetection': 300, 'pipelines.StoragePipeline': 400, } diff --git a/backend/spiders/FlugzeugMarktDeSpider.py b/backend/spiders/FlugzeugMarktDeSpider.py index f8f8efc..b61a142 100644 --- a/backend/spiders/FlugzeugMarktDeSpider.py +++ b/backend/spiders/FlugzeugMarktDeSpider.py @@ -1,7 +1,6 @@ import scrapy import datetime import re -from price_parser import Price from my_logging import * BASE_URL = "https://www.flugzeugmarkt.de/" @@ -48,7 +47,6 @@ def _extract_number_from_cell(self, name, response): def parse_detail_page(self, response): date = response.xpath("//tr/td[contains(.,'Eingestellt')]/../td[@class='value']/text()").extract_first() price_str = response.css('div.buy-it-now div.price::text').extract_first() - parsed_price = Price.fromstring(price_str) location = response.xpath("//tr/td[contains(.,'Standort')]/../td[@class='value']/text()").extract_first() hours = self._extract_number_from_cell("Gesamtzeit", response) starts = self._extract_number_from_cell("Landungen", response) @@ -63,10 +61,10 @@ def parse_detail_page(self, response): self.logger.info( "Couldn't determine aircraft type for offer: {0} with url: {1}".format(title, response.url)) self.logger.debug("yielding title %s", title) - yield { + yield { # TODO introduce data class 'title': title, 'date': datetime.datetime.strptime(date, "%d.%m.%Y").date(), - 'price': parsed_price, + 'raw_price': price_str, 'offer_url': response.url, 'location': location, 'aircraft_type': aircraft_type, diff --git a/backend/spiders/PlaneCheckComSpider.py b/backend/spiders/PlaneCheckComSpider.py index 57282a5..090cbce 100644 --- a/backend/spiders/PlaneCheckComSpider.py +++ b/backend/spiders/PlaneCheckComSpider.py @@ -1,6 +1,5 @@ import scrapy import datetime -from price_parser import Price from my_logging import * BASE_URL = "https://www.planecheck.com/" @@ -38,14 +37,13 @@ def parse_detail_page(self, response): logging.info("price with VAT should be: {0}".format(price_str)) else: price_str = response.xpath("//td[contains(.,'Price')]/../td[2][contains(.,',')]/b/text()").extract_first() - parsed_price = Price.fromstring(price_str) location = response.xpath("//td/b[contains(.,'Country')]/../../td[2]/text()").extract_first() - yield { + yield { # TODO introduce data class 'offer_url': response.url, 'title': title, 'aircraft_type': 'airplane', 'date': datetime.datetime.strptime(date, "%d-%m-%Y").date(), # last updated value - 'price': parsed_price, + 'raw_price': price_str, 'detail_text': response.text, 'location': location, # TODO currently only the country is extracted, 'hours': -1, diff --git a/backend/spiders/SoaringDeSpider.py b/backend/spiders/SoaringDeSpider.py index cfa1082..cb4dc40 100644 --- a/backend/spiders/SoaringDeSpider.py +++ b/backend/spiders/SoaringDeSpider.py @@ -2,7 +2,6 @@ from scrapy.spidermiddlewares.httperror import HttpError import datetime import re -from price_parser import Price from my_logging import * GLIDER_OFFERS_URL = "https://soaring.de/osclass/index.php?page=search&sCategory=118" @@ -52,7 +51,6 @@ def errback(self, failure): def parse_detail_page(self, response): price_str = response.css('#item-content .item-header li::text').extract()[1] - parsed_price = Price.fromstring(price_str) date_str = response.css('#item-content .item-header li::text').extract()[3] date_str = date_str.replace('Veröffentlichungsdatum:', '').strip() date_obj = datetime.datetime.strptime(date_str, "%d/%m/%Y").date() @@ -68,9 +66,9 @@ def parse_detail_page(self, response): if 'Gesamtstarts' in aircraft_details: starts = self._extract_first_number(aircraft_details) - yield { + yield { # TODO introduce data class 'title': response.css('#item-content .title strong::text').extract_first(), - 'price': parsed_price, + 'raw_price': price_str, 'offer_url': response.url, 'location': location, 'date': date_obj, diff --git a/backend/tests/requirements.txt b/backend/tests/requirements.txt index f87e95c..fa1c1a0 100644 --- a/backend/tests/requirements.txt +++ b/backend/tests/requirements.txt @@ -1 +1,4 @@ -ddt==1.7.2 \ No newline at end of file +ddt==1.7.2 +psycopg==3.2.1 +pytest==8.3.2 +testcontainers[postgres]==4.7.2 diff --git a/backend/tests/test_FlugzeugMarktDeSpider.py b/backend/tests/test_FlugzeugMarktDeSpider.py index 3090cd7..f8b2ac2 100644 --- a/backend/tests/test_FlugzeugMarktDeSpider.py +++ b/backend/tests/test_FlugzeugMarktDeSpider.py @@ -14,7 +14,7 @@ def test_parse_detail_page(self): fake_response_from_file('samples/flugzeugmarkt_de_offer.html'))) self.assertIsNotNone(item["title"]) self.assertEqual(item["date"], datetime.datetime.strptime("08.10.2019", "%d.%m.%Y").date()) - self.assertIsNotNone(item["price"]) + self.assertEqual("250.000 $", item["raw_price"]) self.assertEqual(1492, item["hours"]) self.assertTrue("IFR Approved" in item["detail_text"]) self.assertEqual("airplane", item["aircraft_type"]) diff --git a/backend/tests/test_PlaneCheckComSpider.py b/backend/tests/test_PlaneCheckComSpider.py index 9d9efd7..ae82134 100644 --- a/backend/tests/test_PlaneCheckComSpider.py +++ b/backend/tests/test_PlaneCheckComSpider.py @@ -22,9 +22,7 @@ def test_parse_detail_page(self): self.assertIsNotNone(item["title"]) self.assertEqual("Beech 95 Travel Air D95A", item["title"]) self.assertEqual(item["date"], datetime.datetime.strptime("31.12.2019", "%d.%m.%Y").date()) - self.assertIsNotNone(item["price"]) - self.assertEqual("92,500", item["price"].amount_text) - self.assertEqual("€", item["price"].currency) + self.assertEqual(item["raw_price"], "€\xa092,500") self.assertTrue(len(item["detail_text"]) > 0) self.assertTrue("Switzerland" in item["location"]) self.assertTrue(len(item["offer_url"]) > 0) @@ -34,5 +32,4 @@ def test_parse_detail_page_price_vat_included(self): fake_response_from_file('samples/planecheck_com_offer_piper.html', encoding='iso-8859-1'))) self.assertIsNotNone(item["title"]) self.assertEqual("Piper PA-34-220T Seneca V", item["title"]) - self.assertEqual(item["price"].currency, "$") - self.assertEqual(item["price"].amount_text, "743,750") + self.assertIsNotNone(item["raw_price"]) diff --git a/backend/tests/test_SoaringDeSpider.py b/backend/tests/test_SoaringDeSpider.py index c5e976f..a3a26da 100644 --- a/backend/tests/test_SoaringDeSpider.py +++ b/backend/tests/test_SoaringDeSpider.py @@ -16,7 +16,7 @@ def test_parse_detail_page(self): self.assertIsNotNone(item["date"]) self.assertTrue(isinstance(item["date"], date)) self.assertIsNotNone(item["title"]) - self.assertIsNotNone(item["price"]) + self.assertEqual("25.000,00 Euro € ", item["raw_price"]) self.assertIsNotNone(item["offer_url"]) self.assertIsNotNone(item["location"]) self.assertEqual(item["hours"], str(2522)) @@ -35,8 +35,7 @@ def test_parse_detail_page_for_tmg(self): self.assertEqual("Dimona H36", item["title"]) self.assertEqual("2880", item["hours"]) self.assertEqual("5672", item["starts"]) - self.assertEqual("22.000,00", item["price"].amount_text) - self.assertEqual("€", item["price"].currency) + self.assertEqual("22.000,00 Euro €\n ", item["raw_price"]) def test_parse_detail_page_for_ls3(self): item = next(self.spider.parse_detail_page( diff --git a/backend/tests/test__testcontainers_setup.py b/backend/tests/test__testcontainers_setup.py new file mode 100644 index 0000000..2696ace --- /dev/null +++ b/backend/tests/test__testcontainers_setup.py @@ -0,0 +1,17 @@ +# This module's name (double __) makes python unittest runner to execute this BEFORE db module (or any other modules depending on 'db') otherwise db module +# will NOT connect properly to Postgres instance initialized here +import os + +from pathlib import Path +from testcontainers.postgres import PostgresContainer + +db_migration_scripts_location = Path(__file__).parent.parent.parent / "db" / "migrations" +postgres = PostgresContainer("postgres:16.3-alpine") +postgres.with_volume_mapping(host=str(db_migration_scripts_location), container=f"/docker-entrypoint-initdb.d/") +postgres.start() + +os.environ["DB_HOST"] = postgres.get_container_host_ip() +os.environ["DB_PORT"] = postgres.get_exposed_port(5432) +os.environ["DB_NAME"] = postgres.dbname +os.environ["DB_USER"] = postgres.username +os.environ["DB_PW"] = postgres.password diff --git a/backend/tests/test_db.py b/backend/tests/test_db.py new file mode 100644 index 0000000..2308c4f --- /dev/null +++ b/backend/tests/test_db.py @@ -0,0 +1,53 @@ +import unittest +from ddt import ddt, data + +import tests.test__testcontainers_setup +import db + +@ddt +class DbTest(unittest.TestCase): + + def setUp(self): + db.truncate_offers() + + def test_should_store_and_fetch_offer(self): + # given + sample_offer = buildOfferWithUrl("https://offers.com/1") + + # when + db.store_offer(sample_offer) + + # then + all_gliders_in_db = db.get_offers_dict(aircraft_type="glider") + self.assertEqual(len(all_gliders_in_db), 1) + self.assertEqual(all_gliders_in_db[0]["title"], "Glider A") + + def test_should_check_url_exists(self): + # given offer exists in db + db.store_offer(buildOfferWithUrl("https://offers.com/1")) + + # when + url_exists = db.offer_url_exists("https://offers.com/1") + + # then + self.assertTrue(url_exists) + self.assertFalse(db.offer_url_exists("https://offers.com/2")) + + +def buildOfferWithUrl(url): + return db.AircraftOffer( + title="Glider A", + creation_datetime="2024-07-30 18:45:42.571 +0200", + date="2024-07-27", + price=29500.00, + currency="€", + currency_code="EUR", + offer_url=url, + spider="segelflug_de_kleinanzeigen", + detail_text="does not matter to much", + aircraft_type="glider" + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/backend/tests/test_pipelines.py b/backend/tests/test_pipelines.py index 4e5937a..a7cf8cc 100644 --- a/backend/tests/test_pipelines.py +++ b/backend/tests/test_pipelines.py @@ -1,11 +1,80 @@ import unittest -import pipelines +from decimal import Decimal + +from price_parser import Price from scrapy.exceptions import DropItem from ddt import ddt, data +import tests.test__testcontainers_setup +import pipelines +import db + +@ddt +class DuplicateDetectionTest(unittest.TestCase): + + def setUp(self): + db.truncate_offers() + + self.sample_offer = buildOfferWithUrl("https://offers.com/1") + self.detection = pipelines.DuplicateDetection() + + def test_new_offer_is_not_duplicate(self): + # given offer in DB with different url + db.store_offer(buildOfferWithUrl("https://offers.com/2")) + + # when & then + try: + self.detection.process_item({"offer_url": "https://offers.com/1"}, None) + except DropItem: + self.fail("DuplicateDetection unexpectedly dropped new item!") + + def test_existing_offer_is_duplicate(self): + # given offer in DB with same url + db.store_offer(self.sample_offer) + + # when & then + self.assertRaises(DropItem, self.detection.process_item, {"offer_url": "https://offers.com/1"}, None) + +@ddt +class PriceParserTest(unittest.TestCase): + def setUp(self): + self.sample_offer = buildOfferWithUrl("https://offers.com/1") + self.detection = pipelines.PriceParser() + + @data( + ({"raw_price": "2,01 Euro €", "offer_url": "https://offers.com/1"}, 2.01), + ({"raw_price": "1.234,00 Euro €", "offer_url": "https://offers.com/2"}, 1_234.00), + ({"raw_price": "123.456,00 Euro €", "offer_url": "https://offers.com/3"}, 123_456.00), + ) + def test_parse_valid_prices(self, testInput): + offer_with_valid_price = testInput[0] + expected_price = Decimal(testInput[1]) + CENTS = Decimal(10) ** -2 + try: + self.detection.process_item(offer_with_valid_price, None) + except DropItem: + self.fail("PriceParser unexpectedly dropped offer with valid price!") + + self.assertEqual(offer_with_valid_price["price"].amount.quantize(CENTS), expected_price.quantize(CENTS)) + self.assertEqual(offer_with_valid_price["price"].currency, "€") + + @data( + {"raw_price": "", "offer_url": "https://offers.com/1"}, + {"raw_price": "Ask for price", "offer_url": "https://offers.com/2"} + ) + def test_should_drop_if_price_is_missing(self, offer_with_invalid_price): + self.assertRaises(DropItem, self.detection.process_item, offer_with_invalid_price, None) + + @data( + {"raw_price": "0 Euro €", "offer_url": "https://offers.com/1"}, + {"raw_price": "0,89 Euro €", "offer_url": "https://offers.com/2"}, # smaller than 1 + {"raw_price": "500.001,00 Euro €", "offer_url": "https://offers.com/3"}, # huge amount + ) + def test_should_drop_if_price_is_unreasonable(self, offer_with_unreasonable_price): + self.assertRaises(DropItem, self.detection.process_item, offer_with_unreasonable_price, None) @ddt -class PipelineProcessingTest(unittest.TestCase): +class FilterSearchAndCharterOffersTest(unittest.TestCase): @data( "Suche Stemme S12", @@ -13,7 +82,7 @@ class PipelineProcessingTest(unittest.TestCase): "Discus CS - SUCHE" ) def test_search_offers_are_dropped(self, offer_title): - offer = {"title": offer_title} + offer = {"title": offer_title, "offer_url": "https://offers.com/1"} offer_filter = pipelines.FilterSearchAndCharterOffers() self.assertRaises(DropItem, offer_filter.process_item, offer, None) @@ -24,7 +93,7 @@ def test_search_offers_are_dropped(self, offer_title): "ASG29E with 15m and 18m wingtips for rent", ) def test_charter_offers_are_dropped(self, offer_title): - offer = {"title": offer_title} + offer = {"title": offer_title, "offer_url": "https://offers.com/1"} offer_filter = pipelines.FilterSearchAndCharterOffers() self.assertRaises(DropItem, offer_filter.process_item, offer, None) @@ -36,3 +105,50 @@ def test_regular_offers_are_not_dropped(self, offer_title): offer = {"title": offer_title} offer_filter = pipelines.FilterSearchAndCharterOffers() offer_filter.process_item(offer, None) + + +@ddt +class StoragePipelineTest(unittest.TestCase): + + def setUp(self): + db.truncate_offers() + self.storage = pipelines.StoragePipeline() + + def test_should_store_offer(self): + # given + sample_raw_offer = { + "title": "Glider A", + "price": Price.fromstring("123.456,00 Euro €"), + "offer_url": "https://offers.com/1", + "location": "Moon", + "date": "2024-07-27", + "hours": 1000, + "starts": 300, + "detail_text": "does not matter that much here", + "aircraft_type": "glider", + } + # when + self.storage.process_item(sample_raw_offer, None) + + # then + all_gliders_in_db = db.get_offers_dict(aircraft_type="glider") + self.assertEqual(len(all_gliders_in_db), 1) + self.assertEqual(all_gliders_in_db[0]["title"], "Glider A") + self.assertTrue(db.offer_url_exists("https://offers.com/1")) + +def buildOfferWithUrl(url): + return db.AircraftOffer( + title="Glider A", + creation_datetime="2024-07-30 18:45:42.571 +0200", + date="2024-07-27", + price=29500.00, + currency="€", + currency_code="EUR", + offer_url=url, + spider="segelflug_de_kleinanzeigen", + detail_text="does not matter to much", + aircraft_type="glider" + ) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/db/migrations/V1.1__aero_offers_created_idx.sql b/db/migrations/V1.1__aero_offers_created_idx.sql deleted file mode 100644 index 8d41bc1..0000000 --- a/db/migrations/V1.1__aero_offers_created_idx.sql +++ /dev/null @@ -1 +0,0 @@ -CREATE INDEX aircraft_offer_creation_datetime_idx ON aircraft_offer (creation_datetime); diff --git a/db/migrations/V2__aero_offers_date_idx.sql b/db/migrations/V2__aero_offers_date_idx.sql new file mode 100644 index 0000000..c1e99fd --- /dev/null +++ b/db/migrations/V2__aero_offers_date_idx.sql @@ -0,0 +1 @@ +CREATE INDEX IF NOT EXISTS aircraft_offer_date_idx ON aircraft_offer (date DESC); diff --git a/db/migrations/disclaimer.txt b/db/migrations/disclaimer.txt new file mode 100644 index 0000000..74b0cee --- /dev/null +++ b/db/migrations/disclaimer.txt @@ -0,0 +1 @@ +Use only major versions (V1, V4, V99 etc), don't use decimals (V1.1) as it will break integration tests using Postgres via TestContainers \ No newline at end of file diff --git a/ui/.env.development b/ui/.env.development index 4590dca..cebda21 100644 --- a/ui/.env.development +++ b/ui/.env.development @@ -1 +1 @@ -VITE_API_URI=http://localhost:8081/api \ No newline at end of file +VITE_API_URI=http://localhost:8081/ \ No newline at end of file diff --git a/ui/.env.production b/ui/.env.production index 9c345ae..2a19752 100644 --- a/ui/.env.production +++ b/ui/.env.production @@ -1 +1 @@ -VITE_API_URI=/api \ No newline at end of file +VITE_API_URI= \ No newline at end of file diff --git a/ui/src/components/AircraftModelFilter.vue b/ui/src/components/AircraftModelFilter.vue index d06ace1..0c021b2 100644 --- a/ui/src/components/AircraftModelFilter.vue +++ b/ui/src/components/AircraftModelFilter.vue @@ -101,7 +101,7 @@ export default { return } - axios.get(`/models`).then((response) => { + axios.get(`/api/models`).then((response) => { this.all_aircraft_types = response.data this.parseAndApplyRoute() this.updateAircraftTypes() diff --git a/ui/src/views/OfferDetails.vue b/ui/src/views/OfferDetails.vue index f58a6d5..3b81273 100644 --- a/ui/src/views/OfferDetails.vue +++ b/ui/src/views/OfferDetails.vue @@ -98,7 +98,7 @@ export default { fetchData() { this.chartData.series = [[]] - axios.get(`/model/${this.manufacturer}/${this.model}`).then((response) => { + axios.get(`/api/offers/${this.manufacturer}/${this.model}`).then((response) => { this.manufacturer_website = response.data.manufacturer_website this.offers = response.data.offers if (this.offers.length === 0) { diff --git a/ui/src/views/OffersList.vue b/ui/src/views/OffersList.vue index f5e31f9..42d960b 100644 --- a/ui/src/views/OffersList.vue +++ b/ui/src/views/OffersList.vue @@ -38,7 +38,7 @@ export default { methods: { fetchData() { axios - .get(`/offers`, { + .get(`/api/offers`, { params: { aircraft_type: this.aircraftType, limit: this.limit,