diff --git a/.env.example b/.env.example index 3826b89..6d994cd 100644 --- a/.env.example +++ b/.env.example @@ -11,3 +11,5 @@ MYSQL_ROOT_PASSWORD=psaggregator LEGAL_URL= PUBLIC_KOFI_USERNAME=zaanposni + +OPENAI_API_KEY= diff --git a/docker-compose-pma.yml b/docker-compose-pma.yml new file mode 100644 index 0000000..6d949fe --- /dev/null +++ b/docker-compose-pma.yml @@ -0,0 +1,16 @@ +services: + phpmyadmin: + container_name: phpmyadmin + image: phpmyadmin + restart: unless-stopped + ports: + - 0.0.0.0:5651:80 + environment: + - PMA_HOST=db + - PMA_PORT=3306 + - PMA_USER=root + - PMA_PASSWORD=${MYSQL_ROOT_PASSWORD} + depends_on: + - db + networks: + - mysql \ No newline at end of file diff --git a/docker-compose-test.yml b/docker-compose-test.yml index e0e7bf8..05a33a0 100644 --- a/docker-compose-test.yml +++ b/docker-compose-test.yml @@ -46,6 +46,7 @@ services: - REDDIT_CLIENT_SECRET=${REDDIT_CLIENT_SECRET} - TWITCH_CLIENT_ID=${TWITCH_CLIENT_ID} - TWITCH_CLIENT_SECRET=${TWITCH_CLIENT_SECRET} + - OPENAI_API_KEY=${OPENAI_API_KEY} - SQLALCHEMY_SILENCE_UBER_WARNING=1 - YT_SERVER_BASE_URL=http://youtube-api volumes: diff --git a/docker-compose.yml b/docker-compose.yml index 71c610b..b22fabc 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -40,6 +40,7 @@ services: - REDDIT_CLIENT_SECRET=${REDDIT_CLIENT_SECRET} - TWITCH_CLIENT_ID=${TWITCH_CLIENT_ID} - TWITCH_CLIENT_SECRET=${TWITCH_CLIENT_SECRET} + - OPENAI_API_KEY=${OPENAI_API_KEY} - SQLALCHEMY_SILENCE_UBER_WARNING=1 - YT_SERVER_BASE_URL=http://youtube-api volumes: diff --git a/readme.md b/readme.md index dffe0ac..2ccd87e 100644 --- a/readme.md +++ b/readme.md @@ -12,6 +12,7 @@ PietSmiet Aggregator is a selfhostable web application that aggregates all the v First, [download][docker_download_url] and install **Docker**. Create a `.env` file in the root directory of the project. You can use the `.env.example` file as a template. +If you do not want to use certain features (for example the OpenAI Streamingplan Analysis), you can skip the corresponding API keys. A `docker-compose.yml` file is provided to run the application. It will start the application and a MySQL database. @@ -59,6 +60,7 @@ git clone https://github.com/zaanposni/psaggregator.git ``` Then, create a `.env` file in the root directory of the project. You can use the `.env.example` file as a template. +If you do not want to use certain features (for example the OpenAI Streamingplan Analysis), you can skip the corresponding API keys. Start the MySQL database and YouTubeOperationalAPI: diff --git a/src/dataimport/.env.example b/src/dataimport/.env.example index 594a9d8..be51dc4 100644 --- a/src/dataimport/.env.example +++ b/src/dataimport/.env.example @@ -5,3 +5,5 @@ REDDIT_CLIENT_SECRET= TWITCH_CLIENT_ID= TWITCH_CLIENT_SECRET= + +OPENAI_API_KEY= diff --git a/src/dataimport/.gitignore b/src/dataimport/.gitignore index e6bab70..55b1e95 100644 --- a/src/dataimport/.gitignore +++ b/src/dataimport/.gitignore @@ -4,3 +4,4 @@ instagram.py openai_test.py .env videos.sql +__pycache__/ diff --git a/src/dataimport/hello-cron b/src/dataimport/hello-cron index 95bbbc1..bd12615 100644 --- a/src/dataimport/hello-cron +++ b/src/dataimport/hello-cron @@ -5,6 +5,7 @@ 5 * * * * . /root/project_env.sh; /usr/local/bin/python /app/youtube.py >> /var/log/cron.log 2>&1 */15 * * * * . /root/project_env.sh; /usr/local/bin/python /app/reddit.py >> /var/log/cron.log 2>&1 */1 * * * * . /root/project_env.sh; /usr/local/bin/python /app/twitch.py >> /var/log/cron.log 2>&1 +1 1 * * * . /root/project_env.sh; /usr/local/bin/python /app/informationopenaianalyze.py >> /var/log/cron.log 2>&1 2 * * * * . /root/project_env.sh; /usr/local/bin/python /app/pietsmietdeuploadplan.py >> /var/log/cron.log 2>&1 5 * * * * . /root/project_env.sh; /usr/local/bin/python /app/pietsmietdeuploadplan.py >> /var/log/cron.log 2>&1 30 * * * * . /root/project_env.sh; /usr/local/bin/python /app/pietsmietdeuploadplan.py >> /var/log/cron.log 2>&1 diff --git a/src/dataimport/informationopenaianalyze.py b/src/dataimport/informationopenaianalyze.py new file mode 100644 index 0000000..4aa3cfc --- /dev/null +++ b/src/dataimport/informationopenaianalyze.py @@ -0,0 +1,153 @@ +import json +import os +import asyncio +import base64 +from uuid import uuid4 +from datetime import datetime, timedelta + +from rich.console import Console +from databases import Database +from openai import OpenAI +from dateutil.parser import parse + + +if not os.getenv("OPENAI_API_KEY"): + print("OPENAI_API_KEY not set", style="bold red") + exit(1) + +console = Console() +client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) + +INSERT_STATEMENT = """ + INSERT INTO ScheduledContentPiece (id, remoteId, title, description, additionalInfo, startDate, imageUri, href, secondaryHref, duration, importedAt, importedFrom, type) + VALUES (:id, NULL, :title, :description, :additionalInfo, :startDate, NULL, 'https://twitch.tv/pietsmiet', NULL, NULL, now(), 'OpenAI', 'TwitchStream')""" + + +def openai_request(file_data) -> dict: + console.log("Sending request to OpenAI...", style="bold green") + response = client.chat.completions.create( + model="gpt-4-vision-preview", + messages=[ + { + "role": "system", + "content": [ + { + "type": "text", + "text": " ".join( + """ + You are an assistant that generates JSON. You always return just the JSON with no additional description or context. + The following image might be a streaming plan of a content creator. + The JSON contains a list of streams. Each stream MUST have a start, title, game, additional_information. Use ISO 8601 format. + If the image is not a streaming plan, return an empty list of streams. + Keep in mind that all names, texts and times are in German. There can be multiple streams per day. + """.split() + ), + } + ], + }, + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{file_data}", + "detail": "high", + }, + }, + ], + }, + ], + max_tokens=2000, + ) + + console.log("Response:", style="bold green") + console.log(response, style="bold green") + + sanitized_string = ( + response.choices[0] + .message.content.replace("\n", "") + .replace("```json", "") + .replace("```", "") + ) + data = json.loads(sanitized_string) + console.log("Data:", style="bold green") + console.log(data, style="bold green") + return data + + +async def openai(): + console.log("Connecting to database...", style="bold green") + db = Database(url=os.getenv("DATABASE_URL")) + await db.connect() + + last_day = datetime.now() - timedelta(days=1) + query = f""" + SELECT * FROM Information + WHERE analyzedAt IS NULL AND + importedAt > '{last_day.strftime('%Y-%m-%d %H:%M:%S')}' AND + imageUri IS NOT NULL AND + importedFrom = 'YouTube'""" + console.log("Fetching data...", style="bold green") + + rows = await db.fetch_all(query) + console.log(f"Fetched {len(rows)} rows", style="bold green") + + for row in rows: + try: + console.log(f"Analyzing {row.id}", style="bold green") + file_path = f"/app{row.imageUri}" + with open(file_path, "rb") as image_file: + file_data = base64.b64encode(image_file.read()).decode("utf-8") + + open_ai_res = openai_request(file_data) + + console.log( + f"Analyzed {row.id} with {len(open_ai_res['streams'])} streams", + style="bold green", + ) + + for stream in open_ai_res["streams"]: + if any(x not in stream for x in ["start", "title", "game"]): + console.log( + f"Missing required fields in stream {stream}", + style="bold red", + ) + continue + + start_date = parse(stream["start"]) + if start_date.year < datetime.now().year: + start_date = start_date.replace(year=datetime.now().year) + + query = "SELECT * FROM ScheduledContentPiece WHERE type = :type AND startDate = :startDate" + values = { + "type": "TwitchStream", + "startDate": start_date.strftime("%Y-%m-%d %H:%M:%S"), + } + existing_stream = await db.fetch_one(query=query, values=values) + if existing_stream: + console.log( + f"Stream {stream['title']} already exists, skipping", + style="bold red", + ) + continue + + console.log(f"Inserting stream {stream['title']}", style="bold green") + values = { + "id": str(uuid4()), + "title": stream["title"], + "description": stream["game"], + "additionalInfo": stream.get("additional_information"), + "startDate": start_date.strftime("%Y-%m-%d %H:%M:%S"), + } + await db.execute(query=INSERT_STATEMENT, values=values) + finally: + console.log(f"Setting analyzedAt for {row.id}", style="bold green") + query = "UPDATE Information SET analyzedAt = NOW() WHERE id = :id" + await db.execute(query=query, values={"id": row.id}) + + await db.disconnect() + console.log("Done") + + +asyncio.run(openai()) diff --git a/src/dataimport/pietsmietdeuploadplan.py b/src/dataimport/pietsmietdeuploadplan.py index 7a7c55f..8126520 100644 --- a/src/dataimport/pietsmietdeuploadplan.py +++ b/src/dataimport/pietsmietdeuploadplan.py @@ -258,6 +258,7 @@ async def stuff() -> asyncio.coroutine: ('{uuid4()}', NULL , '{information}', NULL , 'https://www.pietsmiet.de/uploadplan', '{uploadplan['data'][0]['date']}', now() , 'PietSmietDE')""" await db.execute(query) + await db.disconnect() console.log("Done!", style="bold green") diff --git a/src/dataimport/pietsmietdevideoimporter.py b/src/dataimport/pietsmietdevideoimporter.py index 2fce865..b3e555b 100644 --- a/src/dataimport/pietsmietdevideoimporter.py +++ b/src/dataimport/pietsmietdevideoimporter.py @@ -139,6 +139,7 @@ async def stuff() -> asyncio.coroutine: ) await db.execute(query) + await db.disconnect() console.log("Done!", style="bold green") diff --git a/src/dataimport/reddit.py b/src/dataimport/reddit.py index fb13590..3b59519 100644 --- a/src/dataimport/reddit.py +++ b/src/dataimport/reddit.py @@ -79,6 +79,7 @@ async def stuff() -> asyncio.coroutine: await db.execute(query) + await db.disconnect() console.log("Done!", style="bold green") diff --git a/src/dataimport/requirements.txt b/src/dataimport/requirements.txt index 3ea5ee4..bdbb56c 100644 --- a/src/dataimport/requirements.txt +++ b/src/dataimport/requirements.txt @@ -8,3 +8,4 @@ twitchAPI==4.1.0 praw==7.7.1 rich==12.4.4 requests +openai==1.7.2 diff --git a/src/dataimport/youtube.py b/src/dataimport/youtube.py index fd9ee44..e6c6f8a 100644 --- a/src/dataimport/youtube.py +++ b/src/dataimport/youtube.py @@ -103,7 +103,8 @@ async def youtube(): console.log(f"Insert {yt['id']} into database", style="bold green") await db.execute(query=query) - print("Done") + await db.disconnect() + console.log("Done") asyncio.run(youtube()) diff --git a/src/psaggregator/src/config/config.ts b/src/psaggregator/src/config/config.ts index 806e31b..24a4b5e 100644 --- a/src/psaggregator/src/config/config.ts +++ b/src/psaggregator/src/config/config.ts @@ -4,3 +4,6 @@ export const MAIL_TO_URL = "mailto:psaggregator@zaanposni.com"; export const LEGAL_URL = env.PUBLIC_LEGAL_URL; export const MICROANALYTICS_ID = env.PUBLIC_MICROANALYTICS_ID; export const KOFI_USERNAME = env.PUBLIC_KOFI_USERNAME; + +export const GITHUB_URL = "https://github.com/zaanposni/psaggregator"; +export const GITHUB_AUTHOR_URL = "https://github.com/zaanposni"; diff --git a/src/psaggregator/src/config/migrations/20240122170826_add_analyzed_field/migration.sql b/src/psaggregator/src/config/migrations/20240122170826_add_analyzed_field/migration.sql new file mode 100644 index 0000000..a08276c --- /dev/null +++ b/src/psaggregator/src/config/migrations/20240122170826_add_analyzed_field/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE `Information` ADD COLUMN `analyzedAt` DATETIME(3) NULL DEFAULT CURRENT_TIMESTAMP(3); diff --git a/src/psaggregator/src/config/migrations/20240122174253_add_open_ai_import_type/migration.sql b/src/psaggregator/src/config/migrations/20240122174253_add_open_ai_import_type/migration.sql new file mode 100644 index 0000000..6ed74db --- /dev/null +++ b/src/psaggregator/src/config/migrations/20240122174253_add_open_ai_import_type/migration.sql @@ -0,0 +1,8 @@ +-- AlterTable +ALTER TABLE `ContentPiece` MODIFY `importedFrom` ENUM('Unknown', 'PietSmietDE', 'Instagram', 'Twitter', 'Threads', 'Reddit', 'YouTube', 'OpenAI', 'Custom') NOT NULL; + +-- AlterTable +ALTER TABLE `Information` MODIFY `importedFrom` ENUM('Unknown', 'PietSmietDE', 'Instagram', 'Twitter', 'Threads', 'Reddit', 'YouTube', 'OpenAI', 'Custom') NOT NULL; + +-- AlterTable +ALTER TABLE `ScheduledContentPiece` MODIFY `importedFrom` ENUM('Unknown', 'PietSmietDE', 'Instagram', 'Twitter', 'Threads', 'Reddit', 'YouTube', 'OpenAI', 'Custom') NOT NULL; diff --git a/src/psaggregator/src/config/schema.prisma b/src/psaggregator/src/config/schema.prisma index 09bd8d3..014510a 100644 --- a/src/psaggregator/src/config/schema.prisma +++ b/src/psaggregator/src/config/schema.prisma @@ -21,6 +21,7 @@ enum ImportType { Threads Reddit YouTube + OpenAI Custom } @@ -63,6 +64,7 @@ model Information { imageUri String? @db.VarChar(1024) href String? @db.VarChar(1024) date DateTime? + analyzedAt DateTime? @default(now()) importedAt DateTime importedFrom ImportType } diff --git a/src/psaggregator/src/lib/components/BigHeader.svelte b/src/psaggregator/src/lib/components/BigHeader.svelte index 22387ac..08db89a 100644 --- a/src/psaggregator/src/lib/components/BigHeader.svelte +++ b/src/psaggregator/src/lib/components/BigHeader.svelte @@ -1,7 +1,7 @@ @@ -16,7 +16,7 @@ News API Motivation - + GitHub {/if} diff --git a/src/psaggregator/src/lib/components/Footer.svelte b/src/psaggregator/src/lib/components/Footer.svelte index 7402302..ffb5124 100644 --- a/src/psaggregator/src/lib/components/Footer.svelte +++ b/src/psaggregator/src/lib/components/Footer.svelte @@ -1,6 +1,6 @@ + + + + + + + diff --git a/src/psaggregator/src/lib/components/UploadPlanEntry.svelte b/src/psaggregator/src/lib/components/UploadPlanEntry.svelte index 5cb8760..ef0763a 100644 --- a/src/psaggregator/src/lib/components/UploadPlanEntry.svelte +++ b/src/psaggregator/src/lib/components/UploadPlanEntry.svelte @@ -2,13 +2,14 @@ import type { ScheduledContentPiece } from "@prisma/client"; import { Video, VideoPlayer } from "carbon-icons-svelte"; import moment from "moment"; + import Sparkle from "./Sparkle.svelte"; export let entry: ScheduledContentPiece; {#if entry.href} - -
+ +
{#if entry.type === "TwitchStream"}
{#if entry.startDate} {@const date = moment(entry.startDate)} -
+
{#if !moment().isSame(date, "day")} {date.format("DD. MMM,")} {/if} @@ -25,10 +26,16 @@
{/if} {entry.title} + {#if entry.importedFrom === "OpenAI"} +
+
+ +
+ {/if}
{:else} -
-
+
+
{#if entry.type === "TwitchStream"}
{#if entry.startDate} {@const date = moment(entry.startDate)} -
+
{#if !moment().isSame(date, "day")} {date.format("DD. MMM,")} {/if} @@ -45,5 +52,11 @@
{/if} {entry.title} + {#if entry.importedFrom === "OpenAI"} +
+
+ +
+ {/if}
{/if} diff --git a/src/psaggregator/src/routes/+page.server.ts b/src/psaggregator/src/routes/+page.server.ts index 7653701..fdc50d8 100644 --- a/src/psaggregator/src/routes/+page.server.ts +++ b/src/psaggregator/src/routes/+page.server.ts @@ -10,6 +10,9 @@ export async function load() { type: { equals: "PSVideo" }, + importedFrom: { + equals: "PietSmietDE" + }, startDate: { lt: upperBound, gt: lowerBound diff --git a/src/psaggregator/src/routes/api/+page.svelte b/src/psaggregator/src/routes/api/+page.svelte index 4398651..9024cc4 100644 --- a/src/psaggregator/src/routes/api/+page.svelte +++ b/src/psaggregator/src/routes/api/+page.svelte @@ -4,8 +4,11 @@ import bash from "highlight.js/lib/languages/bash"; import { storeHighlightJs } from "@skeletonlabs/skeleton"; import moment from "moment"; + import { GITHUB_URL, MAIL_TO_URL } from "../../config/config"; + import Sparkle from "$lib/components/Sparkle.svelte"; const curlUploadPlan = `curl -X GET ${location.protocol}//${location.host}/api/uploadplan?date=${moment().format("YYYY-MM-DD")}`; + const curlScheduledContentPieces = `curl -X GET ${location.protocol}//${location.host}/api/scheduledContentPieces?date=${moment().format("YYYY-MM-DD")}&skip=0`; const curlVideos = `curl -X GET ${location.protocol}//${location.host}/api/videos?skip=0`; const curlTwitch = `curl -X GET ${location.protocol}//${location.host}/api/twitch`; const curlThumbnails = `curl -X GET ${location.protocol}//${location.host}/api/thumbnails?skip=0`; @@ -52,14 +55,32 @@ Der Uploadplan wird direkt von der pietsmiet.de importiert. Nutze den ?date Parameter um alte Uploadpläne anzusehen.
+ GET /scheduledContentPieces +
+ +
+
+ scheduledContentPieces sind ähnlich wie der Uploadplan alle potentiell stattfindenden Ereignisse. + Der Unterschied ist, dass hier auch Events angezeigt werden, die nicht im Uploadplan stehen. +
+ +
+ Einzelne Daten in dieser API werden über die OpenAI Vision AI analysiert. + Dadurch kann es gelegentlich zu fehlerhaften oder doppelten Einträgen kommen. + Sollten dir solche Fehler auffallen, melde dich gerne auf GitHub + oder per Mail. +
+
+ Es werden maximal 20 Videos zurückgegeben - sortiert nach dem Datum. + Der ?skip Parameter kann genutzt werden um weitere Einträge zu laden. +
GET /videos
- Videos werden ebenfalls direkt von der pietsmiet.de importiert. - Es werden maximal 20 Videos zurückgegeben - sortiert nach dem Uploaddatum. - Der ?skip Parameter kann genutzt werden um weitere Videos zu laden. + Videos werden direkt von der pietsmiet.de importiert.
GET /twitch
diff --git a/src/psaggregator/src/routes/api/scheduledContentPieces/+server.ts b/src/psaggregator/src/routes/api/scheduledContentPieces/+server.ts new file mode 100644 index 0000000..8e78872 --- /dev/null +++ b/src/psaggregator/src/routes/api/scheduledContentPieces/+server.ts @@ -0,0 +1,57 @@ +import { json } from "@sveltejs/kit"; +import prisma from "$lib/prisma"; +import moment from "moment"; + +export async function GET({ url }) { + let skip = 0; + const skipParam = url.searchParams.get("skip"); + if (skipParam) { + try { + skip = parseInt(skipParam); + } finally { + if (!skip) skip = 0; + } + } + + let date = null; + if (url.searchParams.has("date")) { + try { + date = moment(url.searchParams.get("date")); + } finally { + if (!date || !date.isValid()) { + date = null; + } + } + } + + let data = []; + + if (date) { + const upperBound = date.clone().endOf("day").toDate(); + const lowerBound = date.clone().startOf("day").toDate(); + + data = await prisma.scheduledContentPiece.findMany({ + where: { + startDate: { + lt: upperBound, + gt: lowerBound + } + }, + orderBy: { + startDate: "desc" + }, + skip, + take: 20 + }); + } else { + data = await prisma.scheduledContentPiece.findMany({ + orderBy: { + startDate: "desc" + }, + skip, + take: 20 + }); + } + + return json(data); +} diff --git a/src/psaggregator/src/routes/api/uploadplan/+server.ts b/src/psaggregator/src/routes/api/uploadplan/+server.ts index dd55696..291eda6 100644 --- a/src/psaggregator/src/routes/api/uploadplan/+server.ts +++ b/src/psaggregator/src/routes/api/uploadplan/+server.ts @@ -19,6 +19,9 @@ export async function GET({ url }) { const data = await prisma.scheduledContentPiece.findMany({ where: { + importedFrom: { + equals: "PietSmietDE" + }, startDate: { lt: upperBound, gt: lowerBound diff --git a/src/psaggregator/src/routes/motivation/+page.svelte b/src/psaggregator/src/routes/motivation/+page.svelte index c3df6b5..ee6def9 100644 --- a/src/psaggregator/src/routes/motivation/+page.svelte +++ b/src/psaggregator/src/routes/motivation/+page.svelte @@ -1,6 +1,6 @@