Skip to content

Commit

Permalink
Merge pull request #41 from zaanposni/38-add-instagram-story-import
Browse files Browse the repository at this point in the history
38 add instagram story import
  • Loading branch information
zaanposni authored Feb 4, 2024
2 parents 82173c7 + a5f468d commit 7378f38
Show file tree
Hide file tree
Showing 19 changed files with 591 additions and 11 deletions.
4 changes: 4 additions & 0 deletions src/dataimport/hello-cron
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
5 * * * * . /root/project_env.sh; /usr/local/bin/python /app/youtube.py >> /var/log/cron.log 2>&1
45 21 * * * . /root/project_env.sh; /usr/local/bin/python /app/instagram.py >> /var/log/cron.log 2>&1
45 10 * * * . /root/project_env.sh; /usr/local/bin/python /app/instagram.py >> /var/log/cron.log 2>&1
45 09 * * * . /root/project_env.sh; /usr/local/bin/python /app/instagramstory.py >> /var/log/cron.log 2>&1
45 15 * * * . /root/project_env.sh; /usr/local/bin/python /app/instagramstory.py >> /var/log/cron.log 2>&1
45 20 * * * . /root/project_env.sh; /usr/local/bin/python /app/instagramstory.py >> /var/log/cron.log 2>&1
30 * * * * . /root/project_env.sh; /usr/local/bin/python /app/instagramstorydelete.py >> /var/log/cron.log 2>&1
*/15 * * * * . /root/project_env.sh; /usr/local/bin/python /app/reddit.py >> /var/log/cron.log 2>&1
*/1 * * * * . /root/project_env.sh; /usr/local/bin/python /app/twitch.py >> /var/log/cron.log 2>&1
10 1 * * * . /root/project_env.sh; /usr/local/bin/python /app/informationopenaianalyze.py >> /var/log/cron.log 2>&1
Expand Down
213 changes: 213 additions & 0 deletions src/dataimport/instagramstory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
import asyncio
import os
import requests
import time
import random
from uuid import uuid4

from databases import Database
from rich.console import Console
from instagrapi import Client
from instagrapi.exceptions import LoginRequired
import pyotp


console = Console()

# create cdn directory if not exists
CDN_DIRECTORY = "/app/cdn/instagram/"
RELATIVE_VIDEO_BASE_URI = "/cdn/instagram/"
if not os.path.exists(CDN_DIRECTORY):
console.log(f"Creating {CDN_DIRECTORY} directory...", style="bold green")
os.makedirs(CDN_DIRECTORY)

USERNAME = os.getenv("INSTAGRAM_USERNAME")
PASSWORD = os.getenv("INSTAGRAM_PASSWORD")
KEY_2FA = os.getenv("INSTAGRAM_2FA_SECRET")
CONFIG_PATH = os.getenv("INSTAGRAM_CONFIG_PATH")
if not CONFIG_PATH:
CONFIG_PATH = "session.json"
console.log(f"Using config path {CONFIG_PATH}")

if not USERNAME or not PASSWORD:
raise Exception("No Instagram username or password provided")


def login_user():
"""
Attempts to login to Instagram using either the provided session information
or the provided username and password.
"""

console.log("Attempting to login user...")
if not os.path.exists(CONFIG_PATH):
console.log("No session file found, creating empty session file")
with open(CONFIG_PATH, "w") as f:
f.write("{}")

cl = Client()
session = cl.load_settings(CONFIG_PATH)

login_via_session = False
login_via_pw = False

if session:
try:
cl.set_settings(session)

code_2fa = ""
if KEY_2FA:
code_2fa = pyotp.TOTP(KEY_2FA.replace(" ", "")).now()
cl.login(USERNAME, PASSWORD, verification_code=code_2fa)

# check if session is valid
try:
cl.get_timeline_feed()
except LoginRequired:
console.log(
"Session is invalid, need to login via username and password"
)

old_session = cl.get_settings()

# use the same device uuids across logins
cl.set_settings({})
cl.set_uuids(old_session["uuids"])

code_2fa = ""
if KEY_2FA:
code_2fa = pyotp.TOTP(KEY_2FA.replace(" ", "")).now()
cl.login(USERNAME, PASSWORD, verification_code=code_2fa)
login_via_session = True
except Exception as e:
console.log("Couldn't login user using session information: %s" % e)

if not login_via_session:
try:
console.log(
"Attempting to login via username and password. username: %s" % USERNAME
)

code_2fa = ""
if KEY_2FA:
code_2fa = pyotp.TOTP(KEY_2FA.replace(" ", "")).now()

if cl.login(USERNAME, PASSWORD, verification_code=code_2fa):
login_via_pw = True
except Exception as e:
console.log("Couldn't login user using username and password: %s" % e)

if not login_via_pw and not login_via_session:
raise Exception("Couldn't login user with either password or session")

cl.dump_settings(CONFIG_PATH)
return cl


cl = login_user()
console.log("Successfully logged in user")

time.sleep(random.randint(10, 30))

user_dict = {
"peter": 344058897,
"brammen": 1588473759,
"jay": 2030403724,
"sep": 1609561808,
"chris": 1057433625,
}

console.log("Fetching last 50 media items for each user")

INSERT_QUERY_INFORMATION = """
INSERT INTO Information (id, remoteId, text, additionalInfo, imageUri, href, date, analyzedAt, importedAt, importedFrom)
VALUES (:id, :remoteId, :text, :additionalInfo, :imageUri, :href, :date, NULL, now(), 'InstagramStory')"""
INSERT_QUERY_RESOURCE = """
INSERT INTO InformationResource (id, remoteId, informationId, imageUri, videoUri, videoDuration, importedAt, importedFrom)
VALUES (:id, :remoteId, :informationId, :imageUri, :videoUri, :videoDuration, now(), 'InstagramStory')"""
SELECT_QUERY_INFORMATION = """
SELECT id FROM Information WHERE remoteId = :remoteId AND importedFrom = 'InstagramStory'"""


async def instagram():
console.log("Connecting to database...", style="bold green")
db = Database(os.getenv("DATABASE_URL"))
await db.connect()
for user, user_id in user_dict.items():
console.log(f"Fetching stories for {user}")
stories = cl.user_stories(user_id)
time.sleep(random.randint(10, 30))
console.log(f"Found {len(stories)} stories for {user}")
for story in stories:
remote_id = f"{user}_{str(story.id)}"
media_db_id = uuid4()
console.log(f"Processing story item {remote_id}")
if await db.fetch_one(
SELECT_QUERY_INFORMATION, {"remoteId": str(remote_id)}
):
console.log(
f"Media item {remote_id} already in database", style="bold red"
)
continue
console.log(f"Media item {remote_id} not in database, inserting")

thumbnail_url = story.thumbnail_url
if not thumbnail_url:
console.log(
f"Media item {remote_id} has no thumbnail, skipping",
style="bold red",
)
continue

console.log(f"Downloading thumbnail for {remote_id}")
try:
thumbnail = requests.get(thumbnail_url).content
filename = f"{uuid4()}.jpg"
with open(os.path.join(CDN_DIRECTORY, filename), "wb") as f:
f.write(thumbnail)
thumbnail_url = f"{RELATIVE_VIDEO_BASE_URI}{filename}"
except Exception as e:
console.log(f"Error downloading thumbnail: {e}", style="bold red")
continue

await db.execute(
INSERT_QUERY_INFORMATION,
{
"id": media_db_id,
"remoteId": remote_id,
"text": user,
"additionalInfo": user,
"imageUri": thumbnail_url,
"href": f"https://www.instagram.com/{story.user.username}",
"date": story.taken_at.strftime("%Y-%m-%d %H:%M:%S"),
},
)

if video_url := story.video_url:
time.sleep(random.randint(10, 30))

console.log(f"Downloading video for story resource {story.pk}")
try:
thumbnail = requests.get(video_url).content
filename = f"r_{uuid4()}.mp4"
with open(os.path.join(CDN_DIRECTORY, filename), "wb") as f:
f.write(thumbnail)
video_url = f"{RELATIVE_VIDEO_BASE_URI}{filename}"
except Exception as e:
console.log(f"Error downloading thumbnail: {e}", style="bold red")
continue

await db.execute(
INSERT_QUERY_RESOURCE,
{
"id": uuid4(),
"remoteId": str(story.pk),
"informationId": media_db_id,
"imageUri": thumbnail_url,
"videoUri": video_url,
"videoDuration": story.video_duration,
},
)


asyncio.run(instagram())
55 changes: 55 additions & 0 deletions src/dataimport/instagramstorydelete.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import asyncio
import os
from datetime import datetime, timedelta

from databases import Database
from rich.console import Console


console = Console()

# create cdn directory if not exists
CDN_DIRECTORY = "/app/cdn/instagram/"
RELATIVE_CDN_DIRECTORY = "/app"
if not os.path.exists(CDN_DIRECTORY):
console.log(f"{CDN_DIRECTORY} does not exist, exiting", style="bold red")
exit()

DELETE_QUERY_INFORMATION = "DELETE FROM Information WHERE id = :id"
one_day_ago = datetime.now() - timedelta(days=1)
SELECT_QUERY_INFORMATION = "SELECT Information.id, Information.imageUri, InformationResource.videoUri FROM Information LEFT JOIN InformationResource ON Information.id = InformationResource.informationId WHERE Information.importedFrom = 'InstagramStory' AND Information.date < :one_day_ago"


async def instagram():
console.log("Connecting to database...", style="bold green")
db = Database(os.getenv("DATABASE_URL"))
await db.connect()

console.log("Fetching stories for deletion")
stories = await db.fetch_all(SELECT_QUERY_INFORMATION, {"one_day_ago": one_day_ago})
console.log(f"Found {len(stories)} stories to delete")

for story in stories:
console.log(f"Try deleting local files for story {story.id}")
try:
os.remove(f"{RELATIVE_CDN_DIRECTORY}{story.imageUri}")
os.remove(f"{RELATIVE_CDN_DIRECTORY}{story.videoUri}")
console.log(f"Deleted local files for story {story.id}")
except FileNotFoundError:
console.log(f"Local files for story {story.id} not found", style="yellow")
console.log(f"{RELATIVE_CDN_DIRECTORY}{story.imageUri}")
console.log(f"{RELATIVE_CDN_DIRECTORY}{story.videoUri}")
continue
except Exception as e:
console.log(f"Error deleting local files for story {story.id}", style="red")
console.log(e)
continue

console.log(f"Deleting story {story.id} from database", style="red")
await db.execute(DELETE_QUERY_INFORMATION, {"id": story.id})
console.log(f"Deleted story {story.id} from database")

console.log("Done")


asyncio.run(instagram())
4 changes: 2 additions & 2 deletions src/psaggregator/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/psaggregator/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "psaggregator",
"version": "1.7.2",
"version": "1.8.0",
"scripts": {
"dev": "vite dev",
"build": "vite build",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
-- AlterTable
ALTER TABLE `ContentPiece` MODIFY `importedFrom` ENUM('Unknown', 'PietSmietDE', 'Instagram', 'InstagramStory', 'Twitter', 'Threads', 'Reddit', 'YouTube', 'OpenAI', 'Custom') NOT NULL;

-- AlterTable
ALTER TABLE `Information` MODIFY `importedFrom` ENUM('Unknown', 'PietSmietDE', 'Instagram', 'InstagramStory', 'Twitter', 'Threads', 'Reddit', 'YouTube', 'OpenAI', 'Custom') NOT NULL;

-- AlterTable
ALTER TABLE `InformationResource` MODIFY `importedFrom` ENUM('Unknown', 'PietSmietDE', 'Instagram', 'InstagramStory', 'Twitter', 'Threads', 'Reddit', 'YouTube', 'OpenAI', 'Custom') NOT NULL;

-- AlterTable
ALTER TABLE `ScheduledContentPiece` MODIFY `importedFrom` ENUM('Unknown', 'PietSmietDE', 'Instagram', 'InstagramStory', 'Twitter', 'Threads', 'Reddit', 'YouTube', 'OpenAI', 'Custom') NOT NULL;
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
-- AlterTable
ALTER TABLE `InformationResource` ADD COLUMN `videoDuration` INTEGER NULL;
2 changes: 2 additions & 0 deletions src/psaggregator/src/config/schema.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ enum ImportType {
Unknown
PietSmietDE
Instagram
InstagramStory
Twitter
Threads
Reddit
Expand Down Expand Up @@ -78,6 +79,7 @@ model InformationResource {
informationId String
imageUri String? @db.VarChar(1024)
videoUri String? @db.VarChar(1024)
videoDuration Int?
importedAt DateTime
importedFrom ImportType
}
Expand Down
8 changes: 3 additions & 5 deletions src/psaggregator/src/lib/components/Changelog.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,11 @@
>Ein einzelnes Video konnte aufgrund eines selten Fehlers letzte Woche nicht importiert werden. Diesen Fehler habe ich
behoben.</span>
</div>
<h2 class="my-4 text-2xl font-bold">Neue Features</h2>
<div>
<span>Instagram-Import</span>
<span
>Der Instagram-Import hat häufig nicht richtig funktioniert. Ich habe die Anzahl der Importe pro Tag drastisch reduziert, um
dies zu verhindern. Es kann also stellenweise ein paar Stunden dauern, bis Instagram-Posts hier auftauchen.</span>
<span>Instagram Stories!</span>
<span>Auf der News-Seite gibt es nun eine Übersicht über die Instagram Stories der ersten Reihe.</span>
</div>
<h2 class="my-4 text-2xl font-bold">Neue Features</h2>
<div>
<span>Übersicht News-Seite</span>
<span>Auf mobilen Endgeräten hat die News-Seite nun ein Tabsystem bekommen, damit man nicht mehr so weit scrollen muss.</span>
Expand Down
Loading

0 comments on commit 7378f38

Please sign in to comment.