diff --git a/fimficstats/fimfic-stats.ts b/fimficstats/fimfic-stats.ts index 38e6c4f..b00456d 100644 --- a/fimficstats/fimfic-stats.ts +++ b/fimficstats/fimfic-stats.ts @@ -1,13 +1,13 @@ #!/usr/bin/env bun import "@total-typescript/ts-reset"; +import * as cheerio from "cheerio"; import * as plib from "./lib.ts"; import fs from "fs"; await mane(); async function mane() { - // API Bearer token is required to scrape the data. const access_token = process.argv[2]; const api_domain = "https://www.fimfiction.net/api/v2/stories"; @@ -15,7 +15,6 @@ async function mane() { // Loop over IDs to scrape data. for (let id = 551751; id < 552652; id++) { - // Set API and HTML status to 200. let api_status = 200; let html_status = 200; @@ -41,7 +40,7 @@ async function mane() { id = id - 1; continue; } - + // Get html of the stats page. const stats_html = await fetch(`${stats_domain}/${id}`).then((response) => { html_status = response.status; @@ -55,28 +54,23 @@ async function mane() { if (api_status === 404 && html_status === 404) { console.warn("deleted story"); continue; - } - else if (api_status === 404 && html_status === 200) { + } else if (api_status === 404 && html_status === 200) { console.warn("unpublished story"); // TODO: Add ID as unpublished and continue without scraping. continue; } + // Load the HTML with Cheerio. + const document = cheerio.load(stats_html); + // Format the historical data into JSON. - // Can probably replace this section with an HTML parser, will look into it soon. - const stats = stats_html - .split("\n") - .filter((l) => - l.startsWith('