-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
243 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
#!/usr/bin/env bun | ||
|
||
import "@total-typescript/ts-reset"; | ||
import * as plib from "./lib.ts"; | ||
import fs from "fs"; | ||
|
||
await mane(); | ||
|
||
async function mane() { | ||
// API Bearer token is required to scrape the data. | ||
const access_token = process.argv[2]; | ||
const api_domain = "https://www.fimfiction.net/api/v2/stories"; | ||
const stats_domain = "https://www.fimfiction.net/story/stats"; | ||
// Loop over IDs to scrape data. | ||
for (let id = 551751; id < 552652; id++) { | ||
// Set API status to 200. | ||
let api_status = 200; | ||
// Get data from the API. | ||
const api_json = await fetch(`${api_domain}/${id}`, { | ||
method: "GET", | ||
headers: { | ||
Authorization: `Bearer ${access_token}`, | ||
"Content-Type": "application/json", | ||
}, | ||
}).then((response) => { | ||
if (!response.ok) { | ||
api_status = response.status; | ||
console.error(`HTTP error! Status: ${response.status}`); | ||
} | ||
return response.json(); | ||
}); | ||
// Check for rate limiting. | ||
if (api_status === 429) { | ||
sleep(5000); | ||
id = id - 1; | ||
continue; | ||
} | ||
console.log(api_json); | ||
// Get html of the stats page. | ||
const stats_html = await fetch(`${stats_domain}/${id}`).then((res) => | ||
res.text(), | ||
); | ||
// Checks to see if the story is deleted or unpublished. | ||
if (!stats_html.includes('data-controller="story-stats"') && api_status === 404) { | ||
console.warn("deleted story") | ||
} | ||
if (stats_html.includes('data-controller="story-stats"') && api_status === 404) { | ||
console.warn("unpublished story") | ||
} | ||
if (!stats_html.includes('data-controller="story-stats"')) continue; | ||
// Format the historical data into JSON. | ||
const stats = stats_html | ||
.split("\n") | ||
.filter((l) => | ||
l.startsWith('<div class="layout-two-columns story-stats"'), | ||
)[0] | ||
.replace( | ||
/^<div class="layout-two-columns story-stats" data-controller="story-stats" data-data="/, | ||
"", | ||
) | ||
.replace(/">$/, "") | ||
.replaceAll(""", '"'); | ||
|
||
console.log(id, JSON.parse(stats)); | ||
sleep(1000); | ||
} | ||
} | ||
|
||
function sleep(milliseconds: number) { | ||
const date = Date.now(); | ||
let current_date = null; | ||
do { | ||
current_date = Date.now(); | ||
} while (current_date - date < milliseconds); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
import "@total-typescript/ts-reset"; | ||
import { execSync } from "child_process"; | ||
import path from "path"; | ||
import fs from "fs"; | ||
|
||
export function rmDirs(dirs: string[]) { | ||
for (let dir of dirs) { | ||
if (fs.existsSync(dir)) { | ||
fs.rmSync(dir, { recursive: true, force: true }); | ||
} | ||
} | ||
} | ||
|
||
export function mkDirs(dirs: string[]) { | ||
for (let dir of dirs) { | ||
fs.mkdirSync(dir, { recursive: true }); | ||
} | ||
} | ||
|
||
export function findFilesInDir( | ||
dir: string, | ||
includes: RegExp[], | ||
excludes: RegExp[], | ||
) { | ||
let files: string[] = []; | ||
if (!fs.existsSync(dir)) throw new Error("no dir " + dir); | ||
loop: for (const pathname of fs.readdirSync(dir)) { | ||
const name = path.join(dir, pathname); | ||
if (excludes.length > 0) { | ||
for (const exclude of excludes) { | ||
if (name.match(exclude)) continue loop; | ||
} | ||
} | ||
if (fs.lstatSync(name).isDirectory()) { | ||
files = files.concat(findFilesInDir(name, includes, excludes)); | ||
} else { | ||
if (includes.length > 0) { | ||
for (const include of includes) { | ||
if (!name.match(include)) continue loop; | ||
} | ||
} | ||
files.push(name); | ||
} | ||
} | ||
return files; | ||
} | ||
|
||
export function writeFile(filename: string, data: string) { | ||
const filepath = path.resolve(path.join(process.cwd(), filename)); | ||
fs.writeFileSync(filepath, data, { encoding: "utf-8" }); | ||
} | ||
|
||
export function readFile(filename: string) { | ||
try { | ||
const filepath = path.resolve(filename); | ||
return fs.readFileSync(filepath, { encoding: "utf-8" }); | ||
} catch (error) { | ||
throw new Error(`Failed to open file: ${filename}`); | ||
} | ||
} | ||
|
||
export function jsonFmt(json: string) { | ||
return JSON.stringify(JSON.parse(json), null, "\t"); | ||
} | ||
|
||
export function jsonMinify(json: string) { | ||
return JSON.stringify(JSON.parse(json)); | ||
} | ||
|
||
export function checkInstalled(programs: string[]) { | ||
for (let program of programs) { | ||
try { | ||
execSync(`which "${program}"`); | ||
} catch (err) { | ||
throw new Error(`Exit: "${program}" is not installed.`); | ||
} | ||
} | ||
} | ||
|
||
export function executeCommand(command: string) { | ||
try { | ||
execSync(command); | ||
} catch (err) { | ||
throw new Error(`Failed to execute command: ${command}`); | ||
} | ||
} | ||
|
||
export function executeCommandReturn(command: string) { | ||
try { | ||
return execSync(command).toString(); | ||
} catch (err) { | ||
throw new Error(`Failed to execute command: ${command}`); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
{ | ||
"name": "fimfic-stats", | ||
"version": "0.1.0", | ||
"description": "Scrapes FIMFiction story data stats from the API and stats page.", | ||
"private": true, | ||
"module": "fimfic-stats.ts", | ||
"type": "module", | ||
"scripts": { | ||
"pfmt": "prettier **/*.{ts,json} --use-tabs --write" | ||
}, | ||
"bin": {}, | ||
"repository": { | ||
"type": "git", | ||
"url": "git+https://github.com/SilkRose/Pony.git" | ||
}, | ||
"contributors": [ | ||
"Silk Rose", | ||
"Meadowsys" | ||
], | ||
"license": "CC0-1.0", | ||
"bugs": { | ||
"url": "https://github.com/SilkRose/Pony/issues" | ||
}, | ||
"homepage": "https://github.com/SilkRose/Pony#readme", | ||
"devDependencies": { | ||
"@total-typescript/ts-reset": "^0.5.1", | ||
"bun-types": "latest" | ||
}, | ||
"peerDependencies": { | ||
"typescript": "^5.2.2" | ||
}, | ||
"dependencies": { | ||
"node-html-markdown": "^1.3.0" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
{ | ||
"compilerOptions": { | ||
"lib": ["ESNext"], | ||
"module": "esnext", | ||
"target": "esnext", | ||
"moduleResolution": "bundler", | ||
"moduleDetection": "force", | ||
"composite": true, | ||
"noEmit": true, | ||
"strict": true, | ||
"alwaysStrict": true, | ||
"strictNullChecks": true, | ||
"strictBindCallApply": true, | ||
"strictFunctionTypes": true, | ||
"allowImportingTsExtensions": true, | ||
"strictPropertyInitialization": true, | ||
"esModuleInterop": true, | ||
"allowSyntheticDefaultImports": true, | ||
"forceConsistentCasingInFileNames": true, | ||
"noImplicitAny": true, | ||
"noImplicitThis": true, | ||
"noImplicitReturns": true, | ||
"noImplicitOverride": true, | ||
"allowUnusedLabels": false, | ||
"allowUnreachableCode": false, | ||
"exactOptionalPropertyTypes": true, | ||
"useUnknownInCatchVariables": true, | ||
"noPropertyAccessFromIndexSignature": true, | ||
"isolatedModules": true, | ||
"resolveJsonModule": true, | ||
"noFallthroughCasesInSwitch": true, | ||
"types": ["bun-types"] | ||
} | ||
} |