diff --git a/fimficstats/fimfic-stats.ts b/fimficstats/fimfic-stats.ts index 38e6c4f..b00456d 100644 --- a/fimficstats/fimfic-stats.ts +++ b/fimficstats/fimfic-stats.ts @@ -1,13 +1,13 @@ #!/usr/bin/env bun import "@total-typescript/ts-reset"; +import * as cheerio from "cheerio"; import * as plib from "./lib.ts"; import fs from "fs"; await mane(); async function mane() { - // API Bearer token is required to scrape the data. const access_token = process.argv[2]; const api_domain = "https://www.fimfiction.net/api/v2/stories"; @@ -15,7 +15,6 @@ async function mane() { // Loop over IDs to scrape data. for (let id = 551751; id < 552652; id++) { - // Set API and HTML status to 200. let api_status = 200; let html_status = 200; @@ -41,7 +40,7 @@ async function mane() { id = id - 1; continue; } - + // Get html of the stats page. const stats_html = await fetch(`${stats_domain}/${id}`).then((response) => { html_status = response.status; @@ -55,28 +54,23 @@ async function mane() { if (api_status === 404 && html_status === 404) { console.warn("deleted story"); continue; - } - else if (api_status === 404 && html_status === 200) { + } else if (api_status === 404 && html_status === 200) { console.warn("unpublished story"); // TODO: Add ID as unpublished and continue without scraping. continue; } + // Load the HTML with Cheerio. + const document = cheerio.load(stats_html); + // Format the historical data into JSON. - // Can probably replace this section with an HTML parser, will look into it soon. - const stats = stats_html - .split("\n") - .filter((l) => - l.startsWith('
$/, "") - .replaceAll(""", '"'); - - console.log(api_json, id, JSON.parse(stats)); + const data = document(".layout-two-columns[data-data]").attr("data-data"); + + // Log variables to console for testing. + console.log(id, api_json); + console.dir(JSON.parse(data!), { depth: null }); + + // Sleep for 1 second. sleep(1000); } } diff --git a/fimficstats/package.json b/fimficstats/package.json index 097f4a4..8e0aafa 100644 --- a/fimficstats/package.json +++ b/fimficstats/package.json @@ -30,6 +30,7 @@ "typescript": "^5.2.2" }, "dependencies": { + "cheerio": "^1.0.0-rc.12", "node-html-markdown": "^1.3.0" } }