Skip to content

Commit

Permalink
Enable FlugzeugMarktDeSpider (#17)
Browse files Browse the repository at this point in the history
Also: optimised checking of duplicates (if offer is already stored in
db) by using exists (~0.1ms) instead of plain select (~4ms)
  • Loading branch information
lwitkowski authored Aug 2, 2024
1 parent c2c92b1 commit 8af0d85
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 7 deletions.
8 changes: 3 additions & 5 deletions backend/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,13 +92,11 @@ def update_exchange_rate(exchange_rate):


def has_offer_url(offer_url):
logger.debug("Starting new database connection")
try:
s = select(AircraftOffer.offer_url).where(AircraftOffer.offer_url == offer_url)
query = select(select(AircraftOffer.offer_url).where(AircraftOffer.offer_url == offer_url).exists())
conn = engine.connect()
if conn.execute(s).fetchone():
return True
return False
result = conn.execute(query).fetchone()
return result is not None and result[0] == True
except Exception as e:
logger.error(e)
logger.error("database error, assuming we don't have this offer already")
Expand Down
2 changes: 1 addition & 1 deletion backend/job_fetch_offers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

spiders = {
SoaringDeSpider.SoaringDeSpider: None,
#FlugzeugMarktDeSpider.FlugzeugMarktDeSpider: None,
FlugzeugMarktDeSpider.FlugzeugMarktDeSpider: None,
#PlaneCheckComSpider.PlaneCheckComSpider: None
}
for spider_cls in spiders.keys():
Expand Down
13 changes: 12 additions & 1 deletion backend/spiders/FlugzeugMarktDeSpider.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,18 @@
class FlugzeugMarktDeSpider(scrapy.Spider):
name = "flugzeugmarkt_de"
logger = logging.getLogger(name)
start_urls = [AIRCRAFT_OFFERS_URL]
start_urls = [
AIRCRAFT_OFFERS_URL,
AIRCRAFT_OFFERS_URL + "/seite-2.html",
AIRCRAFT_OFFERS_URL + "/seite-3.html",
AIRCRAFT_OFFERS_URL + "/seite-4.html",
AIRCRAFT_OFFERS_URL + "/seite-5.html",
AIRCRAFT_OFFERS_URL + "/seite-6.html",
AIRCRAFT_OFFERS_URL + "/seite-7.html",
AIRCRAFT_OFFERS_URL + "/seite-8.html",
AIRCRAFT_OFFERS_URL + "/seite-9.html",
AIRCRAFT_OFFERS_URL + "/seite-10.html"
]

aircraft_type_mapping = {
"Motorsegler": "tmg",
Expand Down

0 comments on commit 8af0d85

Please sign in to comment.