diff --git a/src/dataimport/hello-cron b/src/dataimport/hello-cron index 5228a29..25204b6 100644 --- a/src/dataimport/hello-cron +++ b/src/dataimport/hello-cron @@ -5,6 +5,10 @@ 5 * * * * . /root/project_env.sh; /usr/local/bin/python /app/youtube.py >> /var/log/cron.log 2>&1 45 21 * * * . /root/project_env.sh; /usr/local/bin/python /app/instagram.py >> /var/log/cron.log 2>&1 45 10 * * * . /root/project_env.sh; /usr/local/bin/python /app/instagram.py >> /var/log/cron.log 2>&1 +45 09 * * * . /root/project_env.sh; /usr/local/bin/python /app/instagramstory.py >> /var/log/cron.log 2>&1 +45 15 * * * . /root/project_env.sh; /usr/local/bin/python /app/instagramstory.py >> /var/log/cron.log 2>&1 +45 20 * * * . /root/project_env.sh; /usr/local/bin/python /app/instagramstory.py >> /var/log/cron.log 2>&1 +30 * * * * . /root/project_env.sh; /usr/local/bin/python /app/instagramstorydelete.py >> /var/log/cron.log 2>&1 */15 * * * * . /root/project_env.sh; /usr/local/bin/python /app/reddit.py >> /var/log/cron.log 2>&1 */1 * * * * . /root/project_env.sh; /usr/local/bin/python /app/twitch.py >> /var/log/cron.log 2>&1 10 1 * * * . /root/project_env.sh; /usr/local/bin/python /app/informationopenaianalyze.py >> /var/log/cron.log 2>&1 diff --git a/src/dataimport/instagramstory.py b/src/dataimport/instagramstory.py new file mode 100644 index 0000000..20340e7 --- /dev/null +++ b/src/dataimport/instagramstory.py @@ -0,0 +1,213 @@ +import asyncio +import os +import requests +import time +import random +from uuid import uuid4 + +from databases import Database +from rich.console import Console +from instagrapi import Client +from instagrapi.exceptions import LoginRequired +import pyotp + + +console = Console() + +# create cdn directory if not exists +CDN_DIRECTORY = "/app/cdn/instagram/" +RELATIVE_VIDEO_BASE_URI = "/cdn/instagram/" +if not os.path.exists(CDN_DIRECTORY): + console.log(f"Creating {CDN_DIRECTORY} directory...", style="bold green") + os.makedirs(CDN_DIRECTORY) + +USERNAME = os.getenv("INSTAGRAM_USERNAME") +PASSWORD = os.getenv("INSTAGRAM_PASSWORD") +KEY_2FA = os.getenv("INSTAGRAM_2FA_SECRET") +CONFIG_PATH = os.getenv("INSTAGRAM_CONFIG_PATH") +if not CONFIG_PATH: + CONFIG_PATH = "session.json" +console.log(f"Using config path {CONFIG_PATH}") + +if not USERNAME or not PASSWORD: + raise Exception("No Instagram username or password provided") + + +def login_user(): + """ + Attempts to login to Instagram using either the provided session information + or the provided username and password. + """ + + console.log("Attempting to login user...") + if not os.path.exists(CONFIG_PATH): + console.log("No session file found, creating empty session file") + with open(CONFIG_PATH, "w") as f: + f.write("{}") + + cl = Client() + session = cl.load_settings(CONFIG_PATH) + + login_via_session = False + login_via_pw = False + + if session: + try: + cl.set_settings(session) + + code_2fa = "" + if KEY_2FA: + code_2fa = pyotp.TOTP(KEY_2FA.replace(" ", "")).now() + cl.login(USERNAME, PASSWORD, verification_code=code_2fa) + + # check if session is valid + try: + cl.get_timeline_feed() + except LoginRequired: + console.log( + "Session is invalid, need to login via username and password" + ) + + old_session = cl.get_settings() + + # use the same device uuids across logins + cl.set_settings({}) + cl.set_uuids(old_session["uuids"]) + + code_2fa = "" + if KEY_2FA: + code_2fa = pyotp.TOTP(KEY_2FA.replace(" ", "")).now() + cl.login(USERNAME, PASSWORD, verification_code=code_2fa) + login_via_session = True + except Exception as e: + console.log("Couldn't login user using session information: %s" % e) + + if not login_via_session: + try: + console.log( + "Attempting to login via username and password. username: %s" % USERNAME + ) + + code_2fa = "" + if KEY_2FA: + code_2fa = pyotp.TOTP(KEY_2FA.replace(" ", "")).now() + + if cl.login(USERNAME, PASSWORD, verification_code=code_2fa): + login_via_pw = True + except Exception as e: + console.log("Couldn't login user using username and password: %s" % e) + + if not login_via_pw and not login_via_session: + raise Exception("Couldn't login user with either password or session") + + cl.dump_settings(CONFIG_PATH) + return cl + + +cl = login_user() +console.log("Successfully logged in user") + +time.sleep(random.randint(10, 30)) + +user_dict = { + "peter": 344058897, + "brammen": 1588473759, + "jay": 2030403724, + "sep": 1609561808, + "chris": 1057433625, +} + +console.log("Fetching last 50 media items for each user") + +INSERT_QUERY_INFORMATION = """ + INSERT INTO Information (id, remoteId, text, additionalInfo, imageUri, href, date, analyzedAt, importedAt, importedFrom) + VALUES (:id, :remoteId, :text, :additionalInfo, :imageUri, :href, :date, NULL, now(), 'InstagramStory')""" +INSERT_QUERY_RESOURCE = """ + INSERT INTO InformationResource (id, remoteId, informationId, imageUri, videoUri, videoDuration, importedAt, importedFrom) + VALUES (:id, :remoteId, :informationId, :imageUri, :videoUri, :videoDuration, now(), 'InstagramStory')""" +SELECT_QUERY_INFORMATION = """ + SELECT id FROM Information WHERE remoteId = :remoteId AND importedFrom = 'InstagramStory'""" + + +async def instagram(): + console.log("Connecting to database...", style="bold green") + db = Database(os.getenv("DATABASE_URL")) + await db.connect() + for user, user_id in user_dict.items(): + console.log(f"Fetching stories for {user}") + stories = cl.user_stories(user_id) + time.sleep(random.randint(10, 30)) + console.log(f"Found {len(stories)} stories for {user}") + for story in stories: + remote_id = f"{user}_{str(story.id)}" + media_db_id = uuid4() + console.log(f"Processing story item {remote_id}") + if await db.fetch_one( + SELECT_QUERY_INFORMATION, {"remoteId": str(remote_id)} + ): + console.log( + f"Media item {remote_id} already in database", style="bold red" + ) + continue + console.log(f"Media item {remote_id} not in database, inserting") + + thumbnail_url = story.thumbnail_url + if not thumbnail_url: + console.log( + f"Media item {remote_id} has no thumbnail, skipping", + style="bold red", + ) + continue + + console.log(f"Downloading thumbnail for {remote_id}") + try: + thumbnail = requests.get(thumbnail_url).content + filename = f"{uuid4()}.jpg" + with open(os.path.join(CDN_DIRECTORY, filename), "wb") as f: + f.write(thumbnail) + thumbnail_url = f"{RELATIVE_VIDEO_BASE_URI}{filename}" + except Exception as e: + console.log(f"Error downloading thumbnail: {e}", style="bold red") + continue + + await db.execute( + INSERT_QUERY_INFORMATION, + { + "id": media_db_id, + "remoteId": remote_id, + "text": user, + "additionalInfo": user, + "imageUri": thumbnail_url, + "href": f"https://www.instagram.com/{story.user.username}", + "date": story.taken_at.strftime("%Y-%m-%d %H:%M:%S"), + }, + ) + + if video_url := story.video_url: + time.sleep(random.randint(10, 30)) + + console.log(f"Downloading video for story resource {story.pk}") + try: + thumbnail = requests.get(video_url).content + filename = f"r_{uuid4()}.mp4" + with open(os.path.join(CDN_DIRECTORY, filename), "wb") as f: + f.write(thumbnail) + video_url = f"{RELATIVE_VIDEO_BASE_URI}{filename}" + except Exception as e: + console.log(f"Error downloading thumbnail: {e}", style="bold red") + continue + + await db.execute( + INSERT_QUERY_RESOURCE, + { + "id": uuid4(), + "remoteId": str(story.pk), + "informationId": media_db_id, + "imageUri": thumbnail_url, + "videoUri": video_url, + "videoDuration": story.video_duration, + }, + ) + + +asyncio.run(instagram()) diff --git a/src/dataimport/instagramstorydelete.py b/src/dataimport/instagramstorydelete.py new file mode 100644 index 0000000..1bb8278 --- /dev/null +++ b/src/dataimport/instagramstorydelete.py @@ -0,0 +1,55 @@ +import asyncio +import os +from datetime import datetime, timedelta + +from databases import Database +from rich.console import Console + + +console = Console() + +# create cdn directory if not exists +CDN_DIRECTORY = "/app/cdn/instagram/" +RELATIVE_CDN_DIRECTORY = "/app" +if not os.path.exists(CDN_DIRECTORY): + console.log(f"{CDN_DIRECTORY} does not exist, exiting", style="bold red") + exit() + +DELETE_QUERY_INFORMATION = "DELETE FROM Information WHERE id = :id" +one_day_ago = datetime.now() - timedelta(days=1) +SELECT_QUERY_INFORMATION = "SELECT Information.id, Information.imageUri, InformationResource.videoUri FROM Information LEFT JOIN InformationResource ON Information.id = InformationResource.informationId WHERE Information.importedFrom = 'InstagramStory' AND Information.date < :one_day_ago" + + +async def instagram(): + console.log("Connecting to database...", style="bold green") + db = Database(os.getenv("DATABASE_URL")) + await db.connect() + + console.log("Fetching stories for deletion") + stories = await db.fetch_all(SELECT_QUERY_INFORMATION, {"one_day_ago": one_day_ago}) + console.log(f"Found {len(stories)} stories to delete") + + for story in stories: + console.log(f"Try deleting local files for story {story.id}") + try: + os.remove(f"{RELATIVE_CDN_DIRECTORY}{story.imageUri}") + os.remove(f"{RELATIVE_CDN_DIRECTORY}{story.videoUri}") + console.log(f"Deleted local files for story {story.id}") + except FileNotFoundError: + console.log(f"Local files for story {story.id} not found", style="yellow") + console.log(f"{RELATIVE_CDN_DIRECTORY}{story.imageUri}") + console.log(f"{RELATIVE_CDN_DIRECTORY}{story.videoUri}") + continue + except Exception as e: + console.log(f"Error deleting local files for story {story.id}", style="red") + console.log(e) + continue + + console.log(f"Deleting story {story.id} from database", style="red") + await db.execute(DELETE_QUERY_INFORMATION, {"id": story.id}) + console.log(f"Deleted story {story.id} from database") + + console.log("Done") + + +asyncio.run(instagram()) diff --git a/src/psaggregator/package-lock.json b/src/psaggregator/package-lock.json index 54febc1..d685927 100644 --- a/src/psaggregator/package-lock.json +++ b/src/psaggregator/package-lock.json @@ -1,12 +1,12 @@ { "name": "psaggregator", - "version": "1.7.2", + "version": "1.8.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "psaggregator", - "version": "1.7.2", + "version": "1.8.0", "devDependencies": { "@fontsource/fira-mono": "^4.5.10", "@neoconfetti/svelte": "^1.0.0", diff --git a/src/psaggregator/package.json b/src/psaggregator/package.json index 4e6cde8..f4bbbfe 100644 --- a/src/psaggregator/package.json +++ b/src/psaggregator/package.json @@ -1,6 +1,6 @@ { "name": "psaggregator", - "version": "1.7.2", + "version": "1.8.0", "scripts": { "dev": "vite dev", "build": "vite build", diff --git a/src/psaggregator/src/config/migrations/20240204111934_add_instagram_story_type/migration.sql b/src/psaggregator/src/config/migrations/20240204111934_add_instagram_story_type/migration.sql new file mode 100644 index 0000000..a4544c1 --- /dev/null +++ b/src/psaggregator/src/config/migrations/20240204111934_add_instagram_story_type/migration.sql @@ -0,0 +1,11 @@ +-- AlterTable +ALTER TABLE `ContentPiece` MODIFY `importedFrom` ENUM('Unknown', 'PietSmietDE', 'Instagram', 'InstagramStory', 'Twitter', 'Threads', 'Reddit', 'YouTube', 'OpenAI', 'Custom') NOT NULL; + +-- AlterTable +ALTER TABLE `Information` MODIFY `importedFrom` ENUM('Unknown', 'PietSmietDE', 'Instagram', 'InstagramStory', 'Twitter', 'Threads', 'Reddit', 'YouTube', 'OpenAI', 'Custom') NOT NULL; + +-- AlterTable +ALTER TABLE `InformationResource` MODIFY `importedFrom` ENUM('Unknown', 'PietSmietDE', 'Instagram', 'InstagramStory', 'Twitter', 'Threads', 'Reddit', 'YouTube', 'OpenAI', 'Custom') NOT NULL; + +-- AlterTable +ALTER TABLE `ScheduledContentPiece` MODIFY `importedFrom` ENUM('Unknown', 'PietSmietDE', 'Instagram', 'InstagramStory', 'Twitter', 'Threads', 'Reddit', 'YouTube', 'OpenAI', 'Custom') NOT NULL; diff --git a/src/psaggregator/src/config/migrations/20240204113314_save_instagram_story_duration/migration.sql b/src/psaggregator/src/config/migrations/20240204113314_save_instagram_story_duration/migration.sql new file mode 100644 index 0000000..9e1db61 --- /dev/null +++ b/src/psaggregator/src/config/migrations/20240204113314_save_instagram_story_duration/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE `InformationResource` ADD COLUMN `videoDuration` INTEGER NULL; diff --git a/src/psaggregator/src/config/schema.prisma b/src/psaggregator/src/config/schema.prisma index 789a56c..1f67ef0 100644 --- a/src/psaggregator/src/config/schema.prisma +++ b/src/psaggregator/src/config/schema.prisma @@ -17,6 +17,7 @@ enum ImportType { Unknown PietSmietDE Instagram + InstagramStory Twitter Threads Reddit @@ -78,6 +79,7 @@ model InformationResource { informationId String imageUri String? @db.VarChar(1024) videoUri String? @db.VarChar(1024) + videoDuration Int? importedAt DateTime importedFrom ImportType } diff --git a/src/psaggregator/src/lib/components/Changelog.svelte b/src/psaggregator/src/lib/components/Changelog.svelte index db42e86..ae365c1 100644 --- a/src/psaggregator/src/lib/components/Changelog.svelte +++ b/src/psaggregator/src/lib/components/Changelog.svelte @@ -30,13 +30,11 @@ >Ein einzelnes Video konnte aufgrund eines selten Fehlers letzte Woche nicht importiert werden. Diesen Fehler habe ich behoben. +