From 370164200cd000cf2bf7585c3121df6f9537069d Mon Sep 17 00:00:00 2001 From: Jakub Czajkowski Date: Fri, 10 Jan 2025 08:22:40 +0100 Subject: [PATCH 1/3] feat: adjust last update time logic while menu is offline --- app/controllers/meals_controller.ts | 1 + scripts/menu_scrapper.ts | 17 ++++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/app/controllers/meals_controller.ts b/app/controllers/meals_controller.ts index b1a5df6..fe81211 100644 --- a/app/controllers/meals_controller.ts +++ b/app/controllers/meals_controller.ts @@ -20,6 +20,7 @@ export default class MealsController { const lastHash = await WebsiteHash.query() .orderBy("updatedAt", "desc") .first(); + //this case is kinda weird now, but it is very rare if (lastHash === null) { return response .status(200) diff --git a/scripts/menu_scrapper.ts b/scripts/menu_scrapper.ts index 696e016..6b6b360 100644 --- a/scripts/menu_scrapper.ts +++ b/scripts/menu_scrapper.ts @@ -14,6 +14,10 @@ export const url = "https://sks.pwr.edu.pl/menu/"; export async function runScrapper() { const trx = await db.transaction(); + const response = await fetch(url); + const data = await response.text(); + const $ = cheerio.load(data); + try { const currentHash = await cacheMenu(); const storedHash = await WebsiteHash.query() @@ -21,7 +25,9 @@ export async function runScrapper() { .first(); if (storedHash !== null) { - await storedHash.merge({ updatedAt: DateTime.now() }).save(); + if ($("#menu_table").text().trim() === "") { + await storedHash.merge({ updatedAt: DateTime.now() }).save(); + } logger.info( "Hash already exists in the database. Not proceeding with scraping.", ); @@ -33,7 +39,7 @@ export async function runScrapper() { { hash: currentHash }, { client: trx }, ); - const meals = await scrapeMenu(); + const meals = await scrapeMenu(data); for (const meal of meals) { if (meal.price === 0) { @@ -64,10 +70,8 @@ export async function runScrapper() { } } -export async function scrapeMenu() { - const response = await fetch(url); - const data = await response.text(); - const $ = cheerio.load(data); +export async function scrapeMenu(html: string) { + const $ = cheerio.load(html); return $(".category") .map((_, category) => { @@ -106,7 +110,6 @@ export async function scrapeMenu() { export async function cacheMenu() { const response = await fetch(url); const data = await response.text(); - console.log(data); return createHash("sha256").update(data).digest("hex"); } From 4ab9350ac71b83b3b422eaa6d7a7911b3bde8de4 Mon Sep 17 00:00:00 2001 From: Jakub Czajkowski Date: Fri, 10 Jan 2025 08:23:46 +0100 Subject: [PATCH 2/3] test: update tests to match the new scrapeMenu implementation --- tests/unit/menuScrapper/scrape_menu.spec.ts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/unit/menuScrapper/scrape_menu.spec.ts b/tests/unit/menuScrapper/scrape_menu.spec.ts index 1e0c996..2b2a41b 100644 --- a/tests/unit/menuScrapper/scrape_menu.spec.ts +++ b/tests/unit/menuScrapper/scrape_menu.spec.ts @@ -1,20 +1,20 @@ -import nock from "nock"; +import fs from "node:fs"; +import path from "node:path"; import { test } from "@japa/runner"; import { expectedResponse } from "#tests/fixtures/parsed_menu_expected_response"; -import { scrapeMenu, url } from "../../../scripts/menu_scrapper.js"; +import { scrapeMenu } from "../../../scripts/menu_scrapper.js"; test.group("Menu scrapper scrape menu", () => { test("should parse the external menu response", async ({ assert }) => { - nock(url) - .get("/") - .replyWithFile(200, "./tests/fixtures/external_menu_response.html", { - "Content-Type": "text/html; charset=UTF-8", - }); + const htmlResponse = fs.readFileSync( + path.resolve("./tests/fixtures/external_menu_response.html"), + "utf8", + ); - const response = await scrapeMenu(); + const response = await scrapeMenu(htmlResponse); assert.deepEqual(response, expectedResponse); }); }); From d9ff0e87c8c257559ea4c877d927bc7054df86dd Mon Sep 17 00:00:00 2001 From: Jakub Czajkowski Date: Mon, 13 Jan 2025 00:26:57 +0100 Subject: [PATCH 3/3] fix: fix faulty logic --- scripts/menu_scrapper.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/menu_scrapper.ts b/scripts/menu_scrapper.ts index 6b6b360..aa4ea42 100644 --- a/scripts/menu_scrapper.ts +++ b/scripts/menu_scrapper.ts @@ -25,7 +25,7 @@ export async function runScrapper() { .first(); if (storedHash !== null) { - if ($("#menu_table").text().trim() === "") { + if ($("#menu_table").text().trim() !== "") { await storedHash.merge({ updatedAt: DateTime.now() }).save(); } logger.info(