-
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #18 from zaanposni/14-analyze-youtube-community-po…
…sts-with-chatgpt-and-add-streaming-plans-to-database 14 analyze youtube community posts with chatgpt and add streaming plans to database
- Loading branch information
Showing
28 changed files
with
345 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,3 +11,5 @@ MYSQL_ROOT_PASSWORD=psaggregator | |
|
||
LEGAL_URL= | ||
PUBLIC_KOFI_USERNAME=zaanposni | ||
|
||
OPENAI_API_KEY= |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
services: | ||
phpmyadmin: | ||
container_name: phpmyadmin | ||
image: phpmyadmin | ||
restart: unless-stopped | ||
ports: | ||
- 0.0.0.0:5651:80 | ||
environment: | ||
- PMA_HOST=db | ||
- PMA_PORT=3306 | ||
- PMA_USER=root | ||
- PMA_PASSWORD=${MYSQL_ROOT_PASSWORD} | ||
depends_on: | ||
- db | ||
networks: | ||
- mysql |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,3 +5,5 @@ REDDIT_CLIENT_SECRET= | |
|
||
TWITCH_CLIENT_ID= | ||
TWITCH_CLIENT_SECRET= | ||
|
||
OPENAI_API_KEY= |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,3 +4,4 @@ instagram.py | |
openai_test.py | ||
.env | ||
videos.sql | ||
__pycache__/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
import json | ||
import os | ||
import asyncio | ||
import base64 | ||
from uuid import uuid4 | ||
from datetime import datetime, timedelta | ||
|
||
from rich.console import Console | ||
from databases import Database | ||
from openai import OpenAI | ||
from dateutil.parser import parse | ||
|
||
|
||
if not os.getenv("OPENAI_API_KEY"): | ||
print("OPENAI_API_KEY not set", style="bold red") | ||
exit(1) | ||
|
||
console = Console() | ||
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) | ||
|
||
INSERT_STATEMENT = """ | ||
INSERT INTO ScheduledContentPiece (id, remoteId, title, description, additionalInfo, startDate, imageUri, href, secondaryHref, duration, importedAt, importedFrom, type) | ||
VALUES (:id, NULL, :title, :description, :additionalInfo, :startDate, NULL, 'https://twitch.tv/pietsmiet', NULL, NULL, now(), 'OpenAI', 'TwitchStream')""" | ||
|
||
|
||
def openai_request(file_data) -> dict: | ||
console.log("Sending request to OpenAI...", style="bold green") | ||
response = client.chat.completions.create( | ||
model="gpt-4-vision-preview", | ||
messages=[ | ||
{ | ||
"role": "system", | ||
"content": [ | ||
{ | ||
"type": "text", | ||
"text": " ".join( | ||
""" | ||
You are an assistant that generates JSON. You always return just the JSON with no additional description or context. | ||
The following image might be a streaming plan of a content creator. | ||
The JSON contains a list of streams. Each stream MUST have a start, title, game, additional_information. Use ISO 8601 format. | ||
If the image is not a streaming plan, return an empty list of streams. | ||
Keep in mind that all names, texts and times are in German. There can be multiple streams per day. | ||
""".split() | ||
), | ||
} | ||
], | ||
}, | ||
{ | ||
"role": "user", | ||
"content": [ | ||
{ | ||
"type": "image_url", | ||
"image_url": { | ||
"url": f"data:image/jpeg;base64,{file_data}", | ||
"detail": "high", | ||
}, | ||
}, | ||
], | ||
}, | ||
], | ||
max_tokens=2000, | ||
) | ||
|
||
console.log("Response:", style="bold green") | ||
console.log(response, style="bold green") | ||
|
||
sanitized_string = ( | ||
response.choices[0] | ||
.message.content.replace("\n", "") | ||
.replace("```json", "") | ||
.replace("```", "") | ||
) | ||
data = json.loads(sanitized_string) | ||
console.log("Data:", style="bold green") | ||
console.log(data, style="bold green") | ||
return data | ||
|
||
|
||
async def openai(): | ||
console.log("Connecting to database...", style="bold green") | ||
db = Database(url=os.getenv("DATABASE_URL")) | ||
await db.connect() | ||
|
||
last_day = datetime.now() - timedelta(days=1) | ||
query = f""" | ||
SELECT * FROM Information | ||
WHERE analyzedAt IS NULL AND | ||
importedAt > '{last_day.strftime('%Y-%m-%d %H:%M:%S')}' AND | ||
imageUri IS NOT NULL AND | ||
importedFrom = 'YouTube'""" | ||
console.log("Fetching data...", style="bold green") | ||
|
||
rows = await db.fetch_all(query) | ||
console.log(f"Fetched {len(rows)} rows", style="bold green") | ||
|
||
for row in rows: | ||
try: | ||
console.log(f"Analyzing {row.id}", style="bold green") | ||
file_path = f"/app{row.imageUri}" | ||
with open(file_path, "rb") as image_file: | ||
file_data = base64.b64encode(image_file.read()).decode("utf-8") | ||
|
||
open_ai_res = openai_request(file_data) | ||
|
||
console.log( | ||
f"Analyzed {row.id} with {len(open_ai_res['streams'])} streams", | ||
style="bold green", | ||
) | ||
|
||
for stream in open_ai_res["streams"]: | ||
if any(x not in stream for x in ["start", "title", "game"]): | ||
console.log( | ||
f"Missing required fields in stream {stream}", | ||
style="bold red", | ||
) | ||
continue | ||
|
||
start_date = parse(stream["start"]) | ||
if start_date.year < datetime.now().year: | ||
start_date = start_date.replace(year=datetime.now().year) | ||
|
||
query = "SELECT * FROM ScheduledContentPiece WHERE type = :type AND startDate = :startDate" | ||
values = { | ||
"type": "TwitchStream", | ||
"startDate": start_date.strftime("%Y-%m-%d %H:%M:%S"), | ||
} | ||
existing_stream = await db.fetch_one(query=query, values=values) | ||
if existing_stream: | ||
console.log( | ||
f"Stream {stream['title']} already exists, skipping", | ||
style="bold red", | ||
) | ||
continue | ||
|
||
console.log(f"Inserting stream {stream['title']}", style="bold green") | ||
values = { | ||
"id": str(uuid4()), | ||
"title": stream["title"], | ||
"description": stream["game"], | ||
"additionalInfo": stream.get("additional_information"), | ||
"startDate": start_date.strftime("%Y-%m-%d %H:%M:%S"), | ||
} | ||
await db.execute(query=INSERT_STATEMENT, values=values) | ||
finally: | ||
console.log(f"Setting analyzedAt for {row.id}", style="bold green") | ||
query = "UPDATE Information SET analyzedAt = NOW() WHERE id = :id" | ||
await db.execute(query=query, values={"id": row.id}) | ||
|
||
await db.disconnect() | ||
console.log("Done") | ||
|
||
|
||
asyncio.run(openai()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,3 +8,4 @@ twitchAPI==4.1.0 | |
praw==7.7.1 | ||
rich==12.4.4 | ||
requests | ||
openai==1.7.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2 changes: 2 additions & 0 deletions
2
src/psaggregator/src/config/migrations/20240122170826_add_analyzed_field/migration.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
-- AlterTable | ||
ALTER TABLE `Information` ADD COLUMN `analyzedAt` DATETIME(3) NULL DEFAULT CURRENT_TIMESTAMP(3); |
8 changes: 8 additions & 0 deletions
8
src/psaggregator/src/config/migrations/20240122174253_add_open_ai_import_type/migration.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
-- AlterTable | ||
ALTER TABLE `ContentPiece` MODIFY `importedFrom` ENUM('Unknown', 'PietSmietDE', 'Instagram', 'Twitter', 'Threads', 'Reddit', 'YouTube', 'OpenAI', 'Custom') NOT NULL; | ||
|
||
-- AlterTable | ||
ALTER TABLE `Information` MODIFY `importedFrom` ENUM('Unknown', 'PietSmietDE', 'Instagram', 'Twitter', 'Threads', 'Reddit', 'YouTube', 'OpenAI', 'Custom') NOT NULL; | ||
|
||
-- AlterTable | ||
ALTER TABLE `ScheduledContentPiece` MODIFY `importedFrom` ENUM('Unknown', 'PietSmietDE', 'Instagram', 'Twitter', 'Threads', 'Reddit', 'YouTube', 'OpenAI', 'Custom') NOT NULL; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
<script lang="ts"> | ||
let classes = ""; | ||
export { classes as class }; | ||
</script> | ||
|
||
<style> | ||
:global(html.dark .sparkle) { | ||
fill: #f9ee00; | ||
} | ||
.sparkle { | ||
fill: #d4af37; | ||
} | ||
</style> | ||
|
||
<svg width="24" height="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" class="icon-md {classes}"> | ||
<path | ||
class="sparkle" | ||
d="M19.3975 1.35498C19.3746 1.15293 19.2037 1.00021 19.0004 1C18.7971 0.999793 18.6259 1.15217 18.6026 1.35417C18.4798 2.41894 18.1627 3.15692 17.6598 3.65983C17.1569 4.16274 16.4189 4.47983 15.3542 4.60264C15.1522 4.62593 14.9998 4.79707 15 5.00041C15.0002 5.20375 15.1529 5.37457 15.355 5.39746C16.4019 5.51605 17.1562 5.83304 17.6716 6.33906C18.1845 6.84269 18.5078 7.57998 18.6016 8.63539C18.6199 8.84195 18.7931 9.00023 19.0005 9C19.2078 8.99977 19.3806 8.84109 19.3985 8.6345C19.4883 7.59673 19.8114 6.84328 20.3273 6.32735C20.8433 5.81142 21.5967 5.48834 22.6345 5.39851C22.8411 5.38063 22.9998 5.20782 23 5.00045C23.0002 4.79308 22.842 4.61992 22.6354 4.60157C21.58 4.50782 20.8427 4.18447 20.3391 3.67157C19.833 3.15623 19.516 2.40192 19.3975 1.35498Z" | ||
></path> | ||
<path | ||
class="sparkle" | ||
fill-rule="evenodd" | ||
clip-rule="evenodd" | ||
d="M11 3C11.4833 3 11.8974 3.34562 11.9839 3.82111C12.4637 6.46043 13.279 8.23983 14.5196 9.48039C15.7602 10.721 17.5396 11.5363 20.1789 12.0161C20.6544 12.1026 21 12.5167 21 13C21 13.4833 20.6544 13.8974 20.1789 13.9839C17.5396 14.4637 15.7602 15.279 14.5196 16.5196C13.279 17.7602 12.4637 19.5396 11.9839 22.1789C11.8974 22.6544 11.4833 23 11 23C10.5167 23 10.1026 22.6544 10.0161 22.1789C9.53625 19.5396 8.72096 17.7602 7.48039 16.5196C6.23983 15.279 4.46043 14.4637 1.82111 13.9839C1.34562 13.8974 1 13.4833 1 13C1 12.5167 1.34562 12.1026 1.82111 12.0161C4.46043 11.5363 6.23983 10.721 7.48039 9.48039C8.72096 8.23983 9.53625 6.46043 10.0161 3.82111C10.1026 3.34562 10.5167 3 11 3ZM5.66618 13C6.9247 13.5226 7.99788 14.2087 8.89461 15.1054C9.79134 16.0021 10.4774 17.0753 11 18.3338C11.5226 17.0753 12.2087 16.0021 13.1054 15.1054C14.0021 14.2087 15.0753 13.5226 16.3338 13C15.0753 12.4774 14.0021 11.7913 13.1054 10.8946C12.2087 9.99788 11.5226 8.9247 11 7.66618C10.4774 8.9247 9.79134 9.99788 8.89461 10.8946C7.99788 11.7913 6.9247 12.4774 5.66618 13Z" | ||
></path> | ||
</svg> |
Oops, something went wrong.