Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release/1.5.0 #31

Merged
merged 6 commits into from
Jan 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/dataimport/.dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ geckodriver.exe
.env
.env.*
pietsmietfullvideoimport.py
*.sql
*.zip
4 changes: 3 additions & 1 deletion src/dataimport/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ geckodriver.exe
*.json
openai_test.py
.env
videos.sql
*.sql
__pycache__/
threads.py
test.py
*.zip
3 changes: 2 additions & 1 deletion src/dataimport/hello-cron
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
35 * * * * . /root/project_env.sh; /usr/local/bin/python /app/pietsmietdevideoimporter.py >> /var/log/cron.log 2>&1
*/15 * * * * . /root/project_env.sh; /usr/local/bin/python /app/pietsmietdevideoimporter.py >> /var/log/cron.log 2>&1
5 * * * * . /root/project_env.sh; /usr/local/bin/python /app/youtube.py >> /var/log/cron.log 2>&1
5 * * * * . /root/project_env.sh; /usr/local/bin/python /app/instagram.py >> /var/log/cron.log 2>&1
45 21 * * * . /root/project_env.sh; /usr/local/bin/python /app/instagram.py >> /var/log/cron.log 2>&1
45 10 * * * . /root/project_env.sh; /usr/local/bin/python /app/instagram.py >> /var/log/cron.log 2>&1
*/15 * * * * . /root/project_env.sh; /usr/local/bin/python /app/reddit.py >> /var/log/cron.log 2>&1
*/1 * * * * . /root/project_env.sh; /usr/local/bin/python /app/twitch.py >> /var/log/cron.log 2>&1
10 1 * * * . /root/project_env.sh; /usr/local/bin/python /app/informationopenaianalyze.py >> /var/log/cron.log 2>&1
Expand Down
21 changes: 13 additions & 8 deletions src/dataimport/instagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@

console = Console()

# create cdn directory if not exists
if not os.path.exists("/app/cdn/instagram"):
console.log("Creating /app/cdn/instagram directory...", style="bold green")
os.makedirs("/app/cdn/instagram")

USERNAME = os.getenv("INSTAGRAM_USERNAME")
PASSWORD = os.getenv("INSTAGRAM_PASSWORD")
KEY_2FA = os.getenv("INSTAGRAM_2FA_SECRET")
Expand Down Expand Up @@ -123,8 +128,8 @@ async def instagram():
db = Database(url=os.getenv("DATABASE_URL"))
await db.connect()
for user, user_id in user_dict.items():
console.log(f"Fetching last 50 media items for {user}")
last_media = cl.user_medias(user_id, amount=50)
console.log(f"Fetching last 3 media items for {user}")
last_media = cl.user_medias(user_id, amount=3)
console.log(f"Found {len(last_media)} media items for {user}")
for media in last_media:
remote_id = f"{user}_{str(media.id)}"
Expand Down Expand Up @@ -156,10 +161,10 @@ async def instagram():
console.log(f"Downloading thumbnail for {remote_id}")
try:
thumbnail = requests.get(thumbnail_url).content
filename = f"instagram_{uuid4()}.jpg"
with open(f"/app/cdn/{filename}", "wb") as f:
filename = f"{uuid4()}.jpg"
with open(f"/app/cdn/instagram/{filename}", "wb") as f:
f.write(thumbnail)
thumbnail_url = f"/cdn/{filename}"
thumbnail_url = f"/cdn/instagram/{filename}"
except Exception as e:
console.log(f"Error downloading thumbnail: {e}", style="bold red")
continue
Expand All @@ -182,10 +187,10 @@ async def instagram():
console.log(f"Downloading thumbnail for resource {resource.pk}")
try:
thumbnail = requests.get(thumbnail_url).content
filename = f"instagramr_{uuid4()}.jpg"
with open(f"/app/cdn/{filename}", "wb") as f:
filename = f"r_{uuid4()}.jpg"
with open(f"/app/cdn/instagram/{filename}", "wb") as f:
f.write(thumbnail)
thumbnail_url = f"/cdn/{filename}"
thumbnail_url = f"/cdn/instagram/{filename}"
except Exception as e:
console.log(
f"Error downloading thumbnail: {e}", style="bold red"
Expand Down
58 changes: 58 additions & 0 deletions src/dataimport/pietsmietdefullthumbnailimport.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# This file is not actively used in psaggregator.
# It is a script that was used to import all thumbnails from PietSmiet once.
# Other imports only import recent thumbnails.
# This script generates a sql file that can be used to sync all thumbnails in combination with the pietsmietfullvideoimport.py script.

import os
import asyncio
from uuid import uuid4
import requests

from rich.console import Console
from databases import Database


console = Console()


async def stuff() -> asyncio.coroutine:
console.log("Connecting to database...", style="bold green")
db = Database(url=os.getenv("DATABASE_URL"))
await db.connect()

handled = dict()

query = "SELECT * FROM ContentPiece WHERE importedFrom='PietSmietDE' AND type='PSVideo' AND remoteId IS NOT NULL AND imageUri IS NOT NULL"
console.log("Fetching all videos...", style="bold green")
videos = await db.fetch_all(query=query)

for index, video in enumerate(videos):
if video.remoteId in handled:
continue
handled[video.remoteId] = uuid4()

console.log(
f"Fetching thumbnail for {video.remoteId} ({index})...", style="bold green"
)

thumbnail = requests.get(video.imageUri).content
filename = f"/app/cdn/psde/{handled[video.remoteId]}.jpg"
with open(filename, "wb") as f:
f.write(thumbnail)

console.log("Write mapping to file...", style="bold green")

update_statements = list()

for handledId, uuid in handled.items():
update_statements.append(
f"UPDATE ContentPiece SET imageUri='/cdn/psde/{uuid}.jpg' WHERE remoteId='{handledId}'"
)

with open("psde.sql", "w", encoding="utf-8") as f:
f.writelines(update_statements)

console.log("Done!", style="bold green")


asyncio.run(stuff())
22 changes: 19 additions & 3 deletions src/dataimport/pietsmietdevideoimporter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import requests
import os
import json
import asyncio
Expand All @@ -12,6 +13,11 @@

console = Console()

# create cdn directory if not exists
if not os.path.exists("/app/cdn/psde"):
console.log("Creating /app/cdn/psde directory...", style="bold green")
os.makedirs("/app/cdn/psde")


async def stuff() -> asyncio.coroutine:
console.log("Starting...", style="bold green")
Expand Down Expand Up @@ -72,7 +78,7 @@ async def stuff() -> asyncio.coroutine:
uri = f"'{video['short_url']}'"
if video.get("thumbnail"):
try:
imageUri = f"'{video['thumbnail']['variations'][0]['url']}'"
imageUri = f"{video['thumbnail']['variations'][0]['url']}"
except KeyError:
pass
except IndexError:
Expand All @@ -99,7 +105,7 @@ async def stuff() -> asyncio.coroutine:
INSERT INTO ContentPiece (id , remoteId, title, description, additionalInfo, startDate, imageUri, href, duration, importedAt, importedFrom , type) VALUES
('{}', '{}' , '{}' , NULL , NULL , {} , {} , {} , {} , now() , 'PietSmietDE', 'PSVideo');"""
UPDATE_STATEMENT = """
UPDATE ContentPiece SET href={}, imageUri={}, title='{}', duration={} WHERE id='{}';"""
UPDATE ContentPiece SET href={}, title='{}', duration={} WHERE id='{}';"""

console.log("Checking for existing entries...", style="bold green")
for content in data:
Expand All @@ -115,7 +121,6 @@ async def stuff() -> asyncio.coroutine:

query = UPDATE_STATEMENT.format(
content["uri"],
content["imageUri"],
content["title"],
content["duration"],
result[0]["id"],
Expand All @@ -128,6 +133,17 @@ async def stuff() -> asyncio.coroutine:
style="bold yellow",
)

if content["imageUri"] != "NULL":
try:
thumbnail = requests.get(content["imageUri"]).content
filename = f"{uuid4()}.jpg"
with open(f"/app/cdn/psde/{filename}", "wb") as f:
f.write(thumbnail)
content["imageUri"] = f"'/cdn/psde/{filename}'"
except Exception as e:
console.log(f"Error downloading thumbnail: {e}", style="bold red")
content["imageUri"] = f"'{content['imageUri']}'"

query = INSERT_STATEMENT.format(
uuid4(),
content["remoteId"],
Expand Down
24 changes: 23 additions & 1 deletion src/dataimport/reddit.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import requests
import os
import asyncio
from datetime import datetime
from uuid import uuid4

from rich.console import Console
from databases import Database
Expand All @@ -10,6 +12,11 @@

console = Console()

# create cdn directory if not exists
if not os.path.exists("/app/cdn/reddit"):
console.log("Creating /app/cdn/reddit directory...", style="bold green")
os.makedirs("/app/cdn/reddit")


async def stuff() -> asyncio.coroutine:
client_id = os.getenv("REDDIT_CLIENT_ID")
Expand Down Expand Up @@ -52,6 +59,13 @@ async def stuff() -> asyncio.coroutine:
delete_query = "DELETE FROM RedditPost WHERE 1=1;"
await db.execute(delete_query)

console.log("Deleting old thumbnails...", style="bold green")
try:
for file in os.listdir("/app/cdn/reddit"):
os.remove(f"/app/cdn/reddit/{file}")
except Exception as e:
console.log(f"Error deleting old thumbnails: {e}", style="bold red")

INSERT_STATEMENT = """INSERT INTO RedditPost (id , title, description, username, upvotes, comments, sticky, publishedAt, imageUri, href, importedAt) VALUES
('{}', '{}' , NULL , '{}' , {} , {} , {} , '{}' , {} , '{}', now());"""

Expand All @@ -63,7 +77,15 @@ async def stuff() -> asyncio.coroutine:

thumbnail = "NULL"
if submission.thumbnail.startswith("http"):
thumbnail = f"'{submission.thumbnail}'"
try:
thubmnail_content = requests.get(submission.thumbnail).content
thumbnail = f"{uuid4()}.jpg"
with open(f"/app/cdn/reddit/{thumbnail}", "wb") as f:
f.write(thubmnail_content)
thumbnail = f"'/cdn/reddit/{thumbnail}'"
except Exception as e:
console.log(f"Error downloading thumbnail: {e}")
thumbnail = f"'{submission.thumbnail}'"

query = INSERT_STATEMENT.format(
submission.id,
Expand Down
13 changes: 9 additions & 4 deletions src/dataimport/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@

console = Console()

# create cdn directory if not exists
if not os.path.exists("/app/cdn/yt"):
console.log("Creating /app/cdn/yt directory...", style="bold green")
os.makedirs("/app/cdn/yt")

server_base_url = (
os.getenv("YT_SERVER_BASE_URL")
if os.getenv("YT_SERVER_BASE_URL")
Expand Down Expand Up @@ -79,14 +84,14 @@ async def youtube():
pass

if thumbnailUri != "NULL":
# download thumbnail and store it in /app/cdn/
# download thumbnail and store it in /app/cdn/yt/
console.log(f"Downloading thumbnail for {yt['id']}")
try:
thumbnail = requests.get(thumbnailUri).content
filename = f"youtube_{uuid4()}.jpg"
with open(f"/app/cdn/{filename}", "wb") as f:
filename = f"{uuid4()}.jpg"
with open(f"/app/cdn/yt/{filename}", "wb") as f:
f.write(thumbnail)
thumbnailUri = f"'/cdn/{filename}'"
thumbnailUri = f"'/cdn/yt/{filename}'"
except Exception as e:
console.log(f"Error downloading thumbnail: {e}", style="bold red")
thumbnailUri = "NULL"
Expand Down
2 changes: 2 additions & 0 deletions src/nginx/nginx.conf
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ http {
location ^~ /cdn/ {
alias /app/cdn/;
try_files $uri =404;

expires 1M;
}

location ^~ /api {
Expand Down
4 changes: 2 additions & 2 deletions src/psaggregator/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/psaggregator/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "psaggregator",
"version": "1.4.1",
"version": "1.5.0",
"scripts": {
"dev": "vite dev",
"build": "vite build",
Expand Down
Loading