From f2b5f3a861e231f001de0e98df7eae6b756db1f0 Mon Sep 17 00:00:00 2001 From: zaanposni Date: Sat, 27 Jan 2024 17:30:28 +0100 Subject: [PATCH 1/6] fixes #29 --- src/nginx/nginx.conf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/nginx/nginx.conf b/src/nginx/nginx.conf index 03308e4..3551f29 100644 --- a/src/nginx/nginx.conf +++ b/src/nginx/nginx.conf @@ -30,6 +30,8 @@ http { location ^~ /cdn/ { alias /app/cdn/; try_files $uri =404; + + expires 1M; } location ^~ /api { From bfea088158a5d9dc891c1bc534c6d68bac565c5f Mon Sep 17 00:00:00 2001 From: zaanposni Date: Sat, 27 Jan 2024 18:31:35 +0100 Subject: [PATCH 2/6] reduce number of instagram imports --- src/dataimport/hello-cron | 3 ++- src/dataimport/instagram.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/dataimport/hello-cron b/src/dataimport/hello-cron index 52268d7..5228a29 100644 --- a/src/dataimport/hello-cron +++ b/src/dataimport/hello-cron @@ -3,7 +3,8 @@ 35 * * * * . /root/project_env.sh; /usr/local/bin/python /app/pietsmietdevideoimporter.py >> /var/log/cron.log 2>&1 */15 * * * * . /root/project_env.sh; /usr/local/bin/python /app/pietsmietdevideoimporter.py >> /var/log/cron.log 2>&1 5 * * * * . /root/project_env.sh; /usr/local/bin/python /app/youtube.py >> /var/log/cron.log 2>&1 -5 * * * * . /root/project_env.sh; /usr/local/bin/python /app/instagram.py >> /var/log/cron.log 2>&1 +45 21 * * * . /root/project_env.sh; /usr/local/bin/python /app/instagram.py >> /var/log/cron.log 2>&1 +45 10 * * * . /root/project_env.sh; /usr/local/bin/python /app/instagram.py >> /var/log/cron.log 2>&1 */15 * * * * . /root/project_env.sh; /usr/local/bin/python /app/reddit.py >> /var/log/cron.log 2>&1 */1 * * * * . /root/project_env.sh; /usr/local/bin/python /app/twitch.py >> /var/log/cron.log 2>&1 10 1 * * * . /root/project_env.sh; /usr/local/bin/python /app/informationopenaianalyze.py >> /var/log/cron.log 2>&1 diff --git a/src/dataimport/instagram.py b/src/dataimport/instagram.py index 15d2efe..5bdc5bb 100644 --- a/src/dataimport/instagram.py +++ b/src/dataimport/instagram.py @@ -123,8 +123,8 @@ async def instagram(): db = Database(url=os.getenv("DATABASE_URL")) await db.connect() for user, user_id in user_dict.items(): - console.log(f"Fetching last 50 media items for {user}") - last_media = cl.user_medias(user_id, amount=50) + console.log(f"Fetching last 3 media items for {user}") + last_media = cl.user_medias(user_id, amount=3) console.log(f"Found {len(last_media)} media items for {user}") for media in last_media: remote_id = f"{user}_{str(media.id)}" From 5a3ac963b5b68ec2a507bc473678bc4063a796b7 Mon Sep 17 00:00:00 2001 From: zaanposni Date: Sat, 27 Jan 2024 19:20:57 +0100 Subject: [PATCH 3/6] import thumbnail --- .../pietsmietdefullthumbnailimport.py | 58 +++++++++++++++++++ src/dataimport/pietsmietdevideoimporter.py | 14 ++++- 2 files changed, 70 insertions(+), 2 deletions(-) create mode 100644 src/dataimport/pietsmietdefullthumbnailimport.py diff --git a/src/dataimport/pietsmietdefullthumbnailimport.py b/src/dataimport/pietsmietdefullthumbnailimport.py new file mode 100644 index 0000000..2038346 --- /dev/null +++ b/src/dataimport/pietsmietdefullthumbnailimport.py @@ -0,0 +1,58 @@ +# This file is not actively used in psaggregator. +# It is a script that was used to import all thumbnails from PietSmiet once. +# Other imports only import recent thumbnails. +# This script generates a sql file that can be used to sync all thumbnails in combination with the pietsmietfullvideoimport.py script. + +import os +import asyncio +from uuid import uuid4 +import requests + +from rich.console import Console +from databases import Database + + +console = Console() + + +async def stuff() -> asyncio.coroutine: + console.log("Connecting to database...", style="bold green") + db = Database(url=os.getenv("DATABASE_URL")) + await db.connect() + + handled = dict() + + query = "SELECT * FROM ContentPiece WHERE importedFrom='PietSmietDE' AND type='PSVideo' AND remoteId IS NOT NULL AND imageUri IS NOT NULL" + console.log("Fetching all videos...", style="bold green") + videos = await db.fetch_all(query=query) + + for index, video in enumerate(videos): + if video.remoteId in handled: + continue + handled[video.remoteId] = uuid4() + + console.log( + f"Fetching thumbnail for {video.remoteId} ({index})...", style="bold green" + ) + + thumbnail = requests.get(video.imageUri).content + filename = f"/app/cdn/psde/{handled[video.remoteId]}.jpg" + with open(filename, "wb") as f: + f.write(thumbnail) + + console.log("Write mapping to file...", style="bold green") + + update_statements = list() + + for handledId, uuid in handled.items(): + update_statements.append( + f"UPDATE ContentPiece SET imageUri='/cdn/psde/{uuid}.jpg' WHERE remoteId='{handledId}'" + ) + + with open("psde.sql", "w", encoding="utf-8") as f: + f.writelines(update_statements) + + console.log("Done!", style="bold green") + + +asyncio.run(stuff()) diff --git a/src/dataimport/pietsmietdevideoimporter.py b/src/dataimport/pietsmietdevideoimporter.py index b3e555b..359cb1c 100644 --- a/src/dataimport/pietsmietdevideoimporter.py +++ b/src/dataimport/pietsmietdevideoimporter.py @@ -1,3 +1,4 @@ +import requests import os import json import asyncio @@ -99,7 +100,7 @@ async def stuff() -> asyncio.coroutine: INSERT INTO ContentPiece (id , remoteId, title, description, additionalInfo, startDate, imageUri, href, duration, importedAt, importedFrom , type) VALUES ('{}', '{}' , '{}' , NULL , NULL , {} , {} , {} , {} , now() , 'PietSmietDE', 'PSVideo');""" UPDATE_STATEMENT = """ - UPDATE ContentPiece SET href={}, imageUri={}, title='{}', duration={} WHERE id='{}';""" + UPDATE ContentPiece SET href={}, title='{}', duration={} WHERE id='{}';""" console.log("Checking for existing entries...", style="bold green") for content in data: @@ -115,7 +116,6 @@ async def stuff() -> asyncio.coroutine: query = UPDATE_STATEMENT.format( content["uri"], - content["imageUri"], content["title"], content["duration"], result[0]["id"], @@ -128,6 +128,16 @@ async def stuff() -> asyncio.coroutine: style="bold yellow", ) + if content["imageUri"] != "NULL": + try: + thumbnail = requests.get(content["imageUri"]).content + filename = f"{uuid4()}.jpg" + with open(f"/app/cdn/psde/{filename}", "wb") as f: + f.write(thumbnail) + content["imageUri"] = f"/cdn/psde/{filename}" + except Exception as e: + console.log(f"Error downloading thumbnail: {e}", style="bold red") + query = INSERT_STATEMENT.format( uuid4(), content["remoteId"], From d6c32a04ab350e3a1db2ee5a1ce775b8bc92c322 Mon Sep 17 00:00:00 2001 From: zaanposni Date: Sat, 27 Jan 2024 19:25:07 +0100 Subject: [PATCH 4/6] download reddit thumbnails --- src/dataimport/reddit.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/dataimport/reddit.py b/src/dataimport/reddit.py index 3b59519..1867d4d 100644 --- a/src/dataimport/reddit.py +++ b/src/dataimport/reddit.py @@ -1,6 +1,8 @@ +import requests import os import asyncio from datetime import datetime +from uuid import uuid4 from rich.console import Console from databases import Database @@ -52,6 +54,13 @@ async def stuff() -> asyncio.coroutine: delete_query = "DELETE FROM RedditPost WHERE 1=1;" await db.execute(delete_query) + console.log("Deleting old thumbnails...", style="bold green") + try: + for file in os.listdir("/app/cdn/reddit"): + os.remove(f"/app/cdn/reddit/{file}") + except Exception as e: + console.log(f"Error deleting old thumbnails: {e}", style="bold red") + INSERT_STATEMENT = """INSERT INTO RedditPost (id , title, description, username, upvotes, comments, sticky, publishedAt, imageUri, href, importedAt) VALUES ('{}', '{}' , NULL , '{}' , {} , {} , {} , '{}' , {} , '{}', now());""" @@ -63,7 +72,15 @@ async def stuff() -> asyncio.coroutine: thumbnail = "NULL" if submission.thumbnail.startswith("http"): - thumbnail = f"'{submission.thumbnail}'" + try: + thubmnail_content = requests.get(submission.thumbnail).content + thumbnail = f"'{uuid4()}.jpg'" + with open(f"/app/cdn/reddit/{thumbnail}", "wb") as f: + f.write(thubmnail_content) + thumbnail = f"'/cdn/reddit/{thumbnail}'" + except Exception as e: + console.log(f"Error downloading thumbnail: {e}") + thumbnail = f"'{submission.thumbnail}'" query = INSERT_STATEMENT.format( submission.id, From 36c92eac65038a00897ac6f44cf89b686e6606fa Mon Sep 17 00:00:00 2001 From: zaanposni Date: Sat, 27 Jan 2024 22:19:41 +0100 Subject: [PATCH 5/6] . --- src/dataimport/.dockerignore | 2 ++ src/dataimport/.gitignore | 4 +++- src/dataimport/instagram.py | 17 +++++++++++------ src/dataimport/pietsmietdevideoimporter.py | 10 ++++++++-- src/dataimport/reddit.py | 7 ++++++- src/dataimport/youtube.py | 13 +++++++++---- 6 files changed, 39 insertions(+), 14 deletions(-) diff --git a/src/dataimport/.dockerignore b/src/dataimport/.dockerignore index b8d18da..9b8ce24 100644 --- a/src/dataimport/.dockerignore +++ b/src/dataimport/.dockerignore @@ -3,3 +3,5 @@ geckodriver.exe .env .env.* pietsmietfullvideoimport.py +*.sql +*.zip diff --git a/src/dataimport/.gitignore b/src/dataimport/.gitignore index 1b1d690..6c31534 100644 --- a/src/dataimport/.gitignore +++ b/src/dataimport/.gitignore @@ -2,6 +2,8 @@ geckodriver.exe *.json openai_test.py .env -videos.sql +*.sql __pycache__/ threads.py +test.py +*.zip diff --git a/src/dataimport/instagram.py b/src/dataimport/instagram.py index 5bdc5bb..f124f67 100644 --- a/src/dataimport/instagram.py +++ b/src/dataimport/instagram.py @@ -12,6 +12,11 @@ console = Console() +# create cdn directory if not exists +if not os.path.exists("/app/cdn/instagram"): + console.log("Creating /app/cdn/instagram directory...", style="bold green") + os.makedirs("/app/cdn/instagram") + USERNAME = os.getenv("INSTAGRAM_USERNAME") PASSWORD = os.getenv("INSTAGRAM_PASSWORD") KEY_2FA = os.getenv("INSTAGRAM_2FA_SECRET") @@ -156,10 +161,10 @@ async def instagram(): console.log(f"Downloading thumbnail for {remote_id}") try: thumbnail = requests.get(thumbnail_url).content - filename = f"instagram_{uuid4()}.jpg" - with open(f"/app/cdn/{filename}", "wb") as f: + filename = f"{uuid4()}.jpg" + with open(f"/app/cdn/instagram/{filename}", "wb") as f: f.write(thumbnail) - thumbnail_url = f"/cdn/{filename}" + thumbnail_url = f"/cdn/instagram/{filename}" except Exception as e: console.log(f"Error downloading thumbnail: {e}", style="bold red") continue @@ -182,10 +187,10 @@ async def instagram(): console.log(f"Downloading thumbnail for resource {resource.pk}") try: thumbnail = requests.get(thumbnail_url).content - filename = f"instagramr_{uuid4()}.jpg" - with open(f"/app/cdn/{filename}", "wb") as f: + filename = f"r_{uuid4()}.jpg" + with open(f"/app/cdn/instagram/{filename}", "wb") as f: f.write(thumbnail) - thumbnail_url = f"/cdn/{filename}" + thumbnail_url = f"/cdn/instagram/{filename}" except Exception as e: console.log( f"Error downloading thumbnail: {e}", style="bold red" diff --git a/src/dataimport/pietsmietdevideoimporter.py b/src/dataimport/pietsmietdevideoimporter.py index 359cb1c..bb8da2d 100644 --- a/src/dataimport/pietsmietdevideoimporter.py +++ b/src/dataimport/pietsmietdevideoimporter.py @@ -13,6 +13,11 @@ console = Console() +# create cdn directory if not exists +if not os.path.exists("/app/cdn/psde"): + console.log("Creating /app/cdn/psde directory...", style="bold green") + os.makedirs("/app/cdn/psde") + async def stuff() -> asyncio.coroutine: console.log("Starting...", style="bold green") @@ -73,7 +78,7 @@ async def stuff() -> asyncio.coroutine: uri = f"'{video['short_url']}'" if video.get("thumbnail"): try: - imageUri = f"'{video['thumbnail']['variations'][0]['url']}'" + imageUri = f"{video['thumbnail']['variations'][0]['url']}" except KeyError: pass except IndexError: @@ -134,9 +139,10 @@ async def stuff() -> asyncio.coroutine: filename = f"{uuid4()}.jpg" with open(f"/app/cdn/psde/{filename}", "wb") as f: f.write(thumbnail) - content["imageUri"] = f"/cdn/psde/{filename}" + content["imageUri"] = f"'/cdn/psde/{filename}'" except Exception as e: console.log(f"Error downloading thumbnail: {e}", style="bold red") + content["imageUri"] = f"'{content['imageUri']}'" query = INSERT_STATEMENT.format( uuid4(), diff --git a/src/dataimport/reddit.py b/src/dataimport/reddit.py index 1867d4d..4634de8 100644 --- a/src/dataimport/reddit.py +++ b/src/dataimport/reddit.py @@ -12,6 +12,11 @@ console = Console() +# create cdn directory if not exists +if not os.path.exists("/app/cdn/reddit"): + console.log("Creating /app/cdn/reddit directory...", style="bold green") + os.makedirs("/app/cdn/reddit") + async def stuff() -> asyncio.coroutine: client_id = os.getenv("REDDIT_CLIENT_ID") @@ -74,7 +79,7 @@ async def stuff() -> asyncio.coroutine: if submission.thumbnail.startswith("http"): try: thubmnail_content = requests.get(submission.thumbnail).content - thumbnail = f"'{uuid4()}.jpg'" + thumbnail = f"{uuid4()}.jpg" with open(f"/app/cdn/reddit/{thumbnail}", "wb") as f: f.write(thubmnail_content) thumbnail = f"'/cdn/reddit/{thumbnail}'" diff --git a/src/dataimport/youtube.py b/src/dataimport/youtube.py index 8e8e43e..1951220 100644 --- a/src/dataimport/youtube.py +++ b/src/dataimport/youtube.py @@ -11,6 +11,11 @@ console = Console() +# create cdn directory if not exists +if not os.path.exists("/app/cdn/yt"): + console.log("Creating /app/cdn/yt directory...", style="bold green") + os.makedirs("/app/cdn/yt") + server_base_url = ( os.getenv("YT_SERVER_BASE_URL") if os.getenv("YT_SERVER_BASE_URL") @@ -79,14 +84,14 @@ async def youtube(): pass if thumbnailUri != "NULL": - # download thumbnail and store it in /app/cdn/ + # download thumbnail and store it in /app/cdn/yt/ console.log(f"Downloading thumbnail for {yt['id']}") try: thumbnail = requests.get(thumbnailUri).content - filename = f"youtube_{uuid4()}.jpg" - with open(f"/app/cdn/{filename}", "wb") as f: + filename = f"{uuid4()}.jpg" + with open(f"/app/cdn/yt/{filename}", "wb") as f: f.write(thumbnail) - thumbnailUri = f"'/cdn/{filename}'" + thumbnailUri = f"'/cdn/yt/{filename}'" except Exception as e: console.log(f"Error downloading thumbnail: {e}", style="bold red") thumbnailUri = "NULL" From bc04ab6d09f2a3a047192db052f3fe95cd2bf0b0 Mon Sep 17 00:00:00 2001 From: zaanposni Date: Sat, 27 Jan 2024 22:20:09 +0100 Subject: [PATCH 6/6] bump version --- src/psaggregator/package-lock.json | 4 ++-- src/psaggregator/package.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/psaggregator/package-lock.json b/src/psaggregator/package-lock.json index 5d1a4e1..0ff627c 100644 --- a/src/psaggregator/package-lock.json +++ b/src/psaggregator/package-lock.json @@ -1,12 +1,12 @@ { "name": "psaggregator", - "version": "1.4.1", + "version": "1.5.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "psaggregator", - "version": "1.4.1", + "version": "1.5.0", "devDependencies": { "@fontsource/fira-mono": "^4.5.10", "@neoconfetti/svelte": "^1.0.0", diff --git a/src/psaggregator/package.json b/src/psaggregator/package.json index 53edce0..a3fa484 100644 --- a/src/psaggregator/package.json +++ b/src/psaggregator/package.json @@ -1,6 +1,6 @@ { "name": "psaggregator", - "version": "1.4.1", + "version": "1.5.0", "scripts": { "dev": "vite dev", "build": "vite build",