diff --git a/README.md b/README.md index d3349e4..070671a 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,11 @@ # Aero-offers -This project aims at reviving www.aero-offers.com - invaluable source of price trends for gliders and other aircrafts. +This project aims at reviving www.aero-offers.com - invaluable source of price trends for gliders and other aircrafts, originally developed and maintained by @rthaenert ## Development -[![Continuous Deployment](https://github.com/lwitkowski/aero-offers/actions/workflows/cd.yaml/badge.svg)](https://github.com/lwitkowski/aero-offers/actions/workflows/cd.yaml) +[![CD - UI](https://github.com/lwitkowski/aero-offers/actions/workflows/cd-ui.yaml/badge.svg)](https://github.com/lwitkowski/aero-offers/actions/workflows/cd-ui.yaml) +[![CD - Backend (api, jobs)](https://github.com/lwitkowski/aero-offers/actions/workflows/cd-backend.yaml/badge.svg)](https://github.com/lwitkowski/aero-offers/actions/workflows/cd-backend.yaml) ### Project structure (building blocks / deployment units) - `frontend` - vue.js application deployed as dockerized static web app served by nginx @@ -16,16 +17,18 @@ This project aims at reviving www.aero-offers.com - invaluable source of price t - `db` - PostgreSQL 15 database with DDL scripts managed by Flyway ### TODO -- use Azure secrets for db credentials -- managed db with persistent storage (it's running in ephemeral container atm) -- fix spiders/crawlers -- setup cron triggers (Azure Functions?) -- infra as code (biceps or terraform) -- document infra and env topology -- human readable domain (aero-offers.com?) -- fix aircraft type dropdown -- fix css -- update/simplify legal subpage +- [x] deploy working ui, api and db to Azure +- [x] fix segelflug spider/crawler +- [ ] managed db with persistent storage (it's running in ephemeral container atm) +- [ ] fix other spiders/crawlers +- [ ] use Azure secrets for db credentials +- [ ] setup cron triggers for crawlers, reclassifier and FX rates updater (Azure Functions?) +- [ ] infra as code (biceps or terraform) +- [ ] document infra and env topology +- [ ] human readable domain (aero-offers.com?) +- [ ] fix aircraft type dropdown +- [ ] fix & polish CSS in UI +- [ ] update/simplify legal subpage ### Running locally without Python nor NodeJS `docker compose up --build` - starts postgres, python backend and frontend apps (http://localhost:8080/) diff --git a/backend/job_fetch_offers.py b/backend/job_fetch_offers.py index 679a140..5ba93e0 100644 --- a/backend/job_fetch_offers.py +++ b/backend/job_fetch_offers.py @@ -3,42 +3,43 @@ from twisted.internet import reactor from scrapy.crawler import CrawlerRunner -import db import pprint from my_logging import * from spiders import SoaringDeSpider, FlugzeugMarktDeSpider, PlaneCheckComSpider from mailer import send_mail logger = logging.getLogger("fetch_offers") -try: - settings = get_project_settings() - runner = CrawlerRunner(settings) - - spiders = { - SoaringDeSpider.SoaringDeSpider: None, - #FlugzeugMarktDeSpider.FlugzeugMarktDeSpider: None, - #PlaneCheckComSpider.PlaneCheckComSpider: None - } - for spider_cls in spiders.keys(): - crawler = runner.create_crawler(spider_cls) - spiders[spider_cls] = crawler - runner.crawl(crawler) - - d = runner.join() - d.addBoth(lambda _: reactor.stop()) - reactor.run() # the script will block here until all crawling jobs are finished - - stats_per_spider = {} - - for spider_cls, crawler in spiders.items(): - logger.debug("Fetching stats for spider: %s", spider_cls) - stats_per_spider[spider_cls.name] = crawler.stats.get_stats() - - msg = "Crawling offers completed at {0} \n\n {1} \n".format(str(datetime.now()), pprint.pformat(stats_per_spider)) - - logger.info(msg) - send_mail(msg) -except Exception as e: - msg = "Error connecting to the database: {0}".format(repr(e)) - logger.error(msg) - send_mail(msg) + +if __name__ == '__main__': + try: + settings = get_project_settings() + runner = CrawlerRunner(settings) + + spiders = { + SoaringDeSpider.SoaringDeSpider: None, + #FlugzeugMarktDeSpider.FlugzeugMarktDeSpider: None, + #PlaneCheckComSpider.PlaneCheckComSpider: None + } + for spider_cls in spiders.keys(): + crawler = runner.create_crawler(spider_cls) + spiders[spider_cls] = crawler + runner.crawl(crawler) + + d = runner.join() + d.addBoth(lambda _: reactor.stop()) + reactor.run() # the script will block here until all crawling jobs are finished + + stats_per_spider = {} + + for spider_cls, crawler in spiders.items(): + logger.debug("Fetching stats for spider: %s", spider_cls) + stats_per_spider[spider_cls.name] = crawler.stats.get_stats() + + msg = "Crawling offers completed at {0} \n\n {1} \n".format(str(datetime.now()), pprint.pformat(stats_per_spider)) + + logger.info(msg) + send_mail(msg) + except Exception as e: + msg = "Error connecting to the database: {0}".format(repr(e)) + logger.error(msg) + send_mail(msg) diff --git a/backend/start_api.sh b/backend/start_api.sh old mode 100644 new mode 100755