Skip to content

Commit

Permalink
Fix and add cron
Browse files Browse the repository at this point in the history
  • Loading branch information
jmduke committed Sep 22, 2024
1 parent dd122a3 commit 933ad99
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 7 deletions.
46 changes: 46 additions & 0 deletions app/api/crons/refresh/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import fetch from "@/lib/data";
import { db } from "@/lib/db/connection";
import { reify } from "@/lib/db/domains";
import { sql } from "kysely";
import pino from "pino";

const logger = pino({
name: "cron-refresh",
});

const RAW_QUERY = sql<{
domain: string;
}>`
select domain from tranco TABLESAMPLE system (0.01)
`;

const getRandomDomains = async () => {
const domains = await RAW_QUERY.execute(db);
return domains.rows.map((row) => row.domain);
};

const MAXIMUM_DOMAINS = 10;

export async function GET(
request: Request,
context: {
params: {
domain: string;
};
},
) {
const domains = await getRandomDomains();
const selectedDomains = domains.slice(0, MAXIMUM_DOMAINS);
await Promise.all(selectedDomains.map(async (domain) => {
logger.info({
message: "refresh.started",
domain: domain,
});
const rawResponse = await fetch(domain);
await reify(domain, rawResponse);
}));

return Response.json({
domains: selectedDomains,
});
}
1 change: 0 additions & 1 deletion app/api/domains/[domain]/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ export async function GET(
) {
const rawResponse = await fetch(context.params.domain);
await reify(context.params.domain, rawResponse);
console.log(rawResponse.detected_technologies);

return Response.json({
domain: context.params.domain,
Expand Down
7 changes: 4 additions & 3 deletions app/technology/[identifier]/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,15 @@ export default async function TechnologyPage({
const trancoCount = process.env.DISABLE_DATABASE
? 0
: await db
.selectFrom("tranco")
.selectFrom("affiliations")
.innerJoin(
"detected_technologies",
"tranco.domain",
"affiliations.domain",
"detected_technologies.domain"
)
.where("detected_technologies.technology", "=", params.identifier)
.select(db.fn.count("tranco.domain").as("count"))
.where("affiliations.identifier", "=", "tranco")
.select(db.fn.count("affiliations.domain").as("count"))
.executeTakeFirst()
.then((result) => Number(result?.count || 0));

Expand Down
6 changes: 3 additions & 3 deletions lib/parsers/html.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ const GENERIC_SOCIAL_MEDIA_PROVIDER = (html: string) => {
const username = match[match.length - 1];
return [
{
identifier: potentialMatch?.identifier,
identifier: potentialMatch?.identifier as string,
metadata: {
username: username.split("?")[0],
},
Expand Down Expand Up @@ -119,7 +119,7 @@ const JSONLD_RULE = (html: string) => {
);
if (service) {
return {
identifier: service.identifier.split("?")[0],
identifier: service.identifier.split("?")[0] as string,
metadata: {
username: url.split("/").pop() || "",
},
Expand Down Expand Up @@ -199,7 +199,7 @@ const SUBDOMAIN_RULE = (html: string, domain: string) => {

const RULES: ((html: string, domain: string) => DetectedTechnology[])[] = [
...Object.values(REGISTRY).map((service) => {
return (html: string, domain: string) => {
return (html: string) => {
const potentialMatches = service.substrings?.filter((substring) =>
html.includes(substring),
);
Expand Down
8 changes: 8 additions & 0 deletions vercel.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"crons": [
{
"path": "/api/crons/refresh",
"schedule": "* * * * *"
}
]
}

0 comments on commit 933ad99

Please sign in to comment.