Skip to content

Commit

Permalink
Use Cheerio to parse html in fimfic stats
Browse files Browse the repository at this point in the history
And run prettier.
  • Loading branch information
SilkRose committed Mar 6, 2024
1 parent 3e4629f commit 6719af3
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 19 deletions.
32 changes: 13 additions & 19 deletions fimficstats/fimfic-stats.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
#!/usr/bin/env bun

import "@total-typescript/ts-reset";
import * as cheerio from "cheerio";
import * as plib from "./lib.ts";
import fs from "fs";

await mane();

async function mane() {

// API Bearer token is required to scrape the data.
const access_token = process.argv[2];
const api_domain = "https://www.fimfiction.net/api/v2/stories";
const stats_domain = "https://www.fimfiction.net/story/stats";

// Loop over IDs to scrape data.
for (let id = 551751; id < 552652; id++) {

// Set API and HTML status to 200.
let api_status = 200;
let html_status = 200;
Expand All @@ -41,7 +40,7 @@ async function mane() {
id = id - 1;
continue;
}

// Get html of the stats page.
const stats_html = await fetch(`${stats_domain}/${id}`).then((response) => {
html_status = response.status;
Expand All @@ -55,28 +54,23 @@ async function mane() {
if (api_status === 404 && html_status === 404) {
console.warn("deleted story");
continue;
}
else if (api_status === 404 && html_status === 200) {
} else if (api_status === 404 && html_status === 200) {
console.warn("unpublished story");
// TODO: Add ID as unpublished and continue without scraping.
continue;
}

// Load the HTML with Cheerio.
const document = cheerio.load(stats_html);

// Format the historical data into JSON.
// Can probably replace this section with an HTML parser, will look into it soon.
const stats = stats_html
.split("\n")
.filter((l) =>
l.startsWith('<div class="layout-two-columns story-stats"'),
)[0]
.replace(
/^<div class="layout-two-columns story-stats" data-controller="story-stats" data-data="/,
"",
)
.replace(/">$/, "")
.replaceAll("&quot;", '"');

console.log(api_json, id, JSON.parse(stats));
const data = document(".layout-two-columns[data-data]").attr("data-data");

// Log variables to console for testing.
console.log(id, api_json);
console.dir(JSON.parse(data!), { depth: null });

// Sleep for 1 second.
sleep(1000);
}
}
Expand Down
1 change: 1 addition & 0 deletions fimficstats/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"typescript": "^5.2.2"
},
"dependencies": {
"cheerio": "^1.0.0-rc.12",
"node-html-markdown": "^1.3.0"
}
}

0 comments on commit 6719af3

Please sign in to comment.