Skip to content

Commit

Permalink
Merge pull request #22 from zaanposni/4-add-instagram-information-import
Browse files Browse the repository at this point in the history
4 add instagram information import
  • Loading branch information
zaanposni authored Jan 26, 2024
2 parents e1ef912 + e0e8048 commit a0fa5d2
Show file tree
Hide file tree
Showing 24 changed files with 618 additions and 166 deletions.
6 changes: 5 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ MYSQL_DATABASE=psaggregator
MYSQL_ROOT_PASSWORD=psaggregator

LEGAL_URL=
PUBLIC_KOFI_USERNAME=zaanposni
KOFI_USERNAME=zaanposni

OPENAI_API_KEY=

INSTAGRAM_USERNAME =
INSTAGRAM_PASSWORD =
INSTAGRAM_2FA_SECRET =
6 changes: 6 additions & 0 deletions docker-compose-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,13 @@ services:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- SQLALCHEMY_SILENCE_UBER_WARNING=1
- YT_SERVER_BASE_URL=http://youtube-api
- INSTAGRAM_USERNAME=${INSTAGRAM_USERNAME}
- INSTAGRAM_PASSWORD=${INSTAGRAM_PASSWORD}
- INSTAGRAM_2FA_SECRET=${INSTAGRAM_2FA_SECRET}
- INSTAGRAM_CONFIG_PATH=/app/config/instagram.json
volumes:
- shared-data:/app/cdn
- config:/app/config
depends_on:
- db
- frontend
Expand Down Expand Up @@ -90,6 +95,7 @@ networks:

volumes:
mysql:
config:
shared-data:
driver: local
driver_opts:
Expand Down
6 changes: 6 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,13 @@ services:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- SQLALCHEMY_SILENCE_UBER_WARNING=1
- YT_SERVER_BASE_URL=http://youtube-api
- INSTAGRAM_USERNAME=${INSTAGRAM_USERNAME}
- INSTAGRAM_PASSWORD=${INSTAGRAM_PASSWORD}
- INSTAGRAM_2FA_SECRET=${INSTAGRAM_2FA_SECRET}
- INSTAGRAM_CONFIG_PATH=/app/config/instagram.json
volumes:
- shared-data:/app/cdn
- config:/app/config
depends_on:
- db
- frontend
Expand Down Expand Up @@ -82,6 +87,7 @@ networks:

volumes:
mysql:
config:
shared-data:
driver: local
driver_opts:
Expand Down
9 changes: 9 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@

PietSmiet Aggregator is a selfhostable web application that aggregates all the videos, streams and additional content from PietSmiet and displays them in a nice overview.

**Dashboard** - Display all PietSmiet videos and streams in a nice overview.\
**API** - Free and Public JSON HTTP API.\
**PietSmietDE Import** - Import all videos and news from the PietSmiet website.\
**YouTube Import** - Import all community posts from the PietSmiet YouTube channel.\
**Reddit Import** - Import trending posts from r/pietsmiet.\
**Instagram Import** - Import all posts.\
**OpenAI Streamingplan Analysis** - Analyze the Streamingplan with OpenAI Vision and import scheduled streams into the database.\
**Full Control** - Selfhostablem, Open Source and Dockerized.

## ⚡️ Quick Start Self Deployment

First, [download][docker_download_url] and install **Docker**.
Expand Down
4 changes: 4 additions & 0 deletions src/dataimport/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,7 @@ TWITCH_CLIENT_ID=
TWITCH_CLIENT_SECRET=

OPENAI_API_KEY=

INSTAGRAM_USERNAME =
INSTAGRAM_PASSWORD =
INSTAGRAM_2FA_SECRET =
2 changes: 1 addition & 1 deletion src/dataimport/.gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
geckodriver.exe
*.json
instagram.py
openai_test.py
.env
videos.sql
__pycache__/
threads.py
1 change: 1 addition & 0 deletions src/dataimport/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ COPY . /app

# Install any needed packages specified in requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
RUN pip install --no-cache-dir --no-deps --force-reinstall pydantic==1.10.13

RUN touch /var/log/cron.log
COPY hello-cron /etc/cron.d/hello-cron
Expand Down
3 changes: 2 additions & 1 deletion src/dataimport/hello-cron
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
35 * * * * . /root/project_env.sh; /usr/local/bin/python /app/pietsmietdevideoimporter.py >> /var/log/cron.log 2>&1
*/15 * * * * . /root/project_env.sh; /usr/local/bin/python /app/pietsmietdevideoimporter.py >> /var/log/cron.log 2>&1
5 * * * * . /root/project_env.sh; /usr/local/bin/python /app/youtube.py >> /var/log/cron.log 2>&1
5 * * * * . /root/project_env.sh; /usr/local/bin/python /app/instagram.py >> /var/log/cron.log 2>&1
*/15 * * * * . /root/project_env.sh; /usr/local/bin/python /app/reddit.py >> /var/log/cron.log 2>&1
*/1 * * * * . /root/project_env.sh; /usr/local/bin/python /app/twitch.py >> /var/log/cron.log 2>&1
1 1 * * * . /root/project_env.sh; /usr/local/bin/python /app/informationopenaianalyze.py >> /var/log/cron.log 2>&1
10 1 * * * . /root/project_env.sh; /usr/local/bin/python /app/informationopenaianalyze.py >> /var/log/cron.log 2>&1
2 * * * * . /root/project_env.sh; /usr/local/bin/python /app/pietsmietdeuploadplan.py >> /var/log/cron.log 2>&1
5 * * * * . /root/project_env.sh; /usr/local/bin/python /app/pietsmietdeuploadplan.py >> /var/log/cron.log 2>&1
30 * * * * . /root/project_env.sh; /usr/local/bin/python /app/pietsmietdeuploadplan.py >> /var/log/cron.log 2>&1
Expand Down
207 changes: 207 additions & 0 deletions src/dataimport/instagram.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
import asyncio
import os
import requests
from databases import Database
from uuid import uuid4

from rich.console import Console
from instagrapi import Client
from instagrapi.exceptions import LoginRequired
import pyotp


console = Console()

USERNAME = os.getenv("INSTAGRAM_USERNAME")
PASSWORD = os.getenv("INSTAGRAM_PASSWORD")
KEY_2FA = os.getenv("INSTAGRAM_2FA_SECRET")
CONFIG_PATH = os.getenv("INSTAGRAM_CONFIG_PATH")
if not CONFIG_PATH:
CONFIG_PATH = "session.json"
console.log(f"Using config path {CONFIG_PATH}")

if not USERNAME or not PASSWORD:
raise Exception("No Instagram username or password provided")


def login_user():
"""
Attempts to login to Instagram using either the provided session information
or the provided username and password.
"""

console.log("Attempting to login user...")
if not os.path.exists(CONFIG_PATH):
console.log("No session file found, creating empty session file")
with open(CONFIG_PATH, "w") as f:
f.write("{}")

cl = Client()
session = cl.load_settings(CONFIG_PATH)

login_via_session = False
login_via_pw = False

if session:
try:
cl.set_settings(session)

code_2fa = ""
if KEY_2FA:
code_2fa = pyotp.TOTP(KEY_2FA.replace(" ", "")).now()
cl.login(USERNAME, PASSWORD, verification_code=code_2fa)

# check if session is valid
try:
cl.get_timeline_feed()
except LoginRequired:
console.log(
"Session is invalid, need to login via username and password"
)

old_session = cl.get_settings()

# use the same device uuids across logins
cl.set_settings({})
cl.set_uuids(old_session["uuids"])

code_2fa = ""
if KEY_2FA:
code_2fa = pyotp.TOTP(KEY_2FA.replace(" ", "")).now()
cl.login(USERNAME, PASSWORD, verification_code=code_2fa)
login_via_session = True
except Exception as e:
console.log("Couldn't login user using session information: %s" % e)

if not login_via_session:
try:
console.log(
"Attempting to login via username and password. username: %s" % USERNAME
)

code_2fa = ""
if KEY_2FA:
code_2fa = pyotp.TOTP(KEY_2FA.replace(" ", "")).now()

if cl.login(USERNAME, PASSWORD, verification_code=code_2fa):
login_via_pw = True
except Exception as e:
console.log("Couldn't login user using username and password: %s" % e)

if not login_via_pw and not login_via_session:
raise Exception("Couldn't login user with either password or session")

cl.dump_settings(CONFIG_PATH)
return cl


cl = login_user()
console.log("Successfully logged in user")

user_dict = {
"peter": 344058897,
"brammen": 1588473759,
"jay": 2030403724,
"sep": 1609561808,
"chris": 1057433625,
}

console.log("Fetching last 50 media items for each user")

INSERT_QUERY_INFORMATION = """
INSERT INTO Information (id, remoteId, text, additionalInfo, imageUri, href, date, analyzedAt, importedAt, importedFrom)
VALUES (:id, :remoteId, :text, :additionalInfo, :imageUri, :href, :date, NULL, now(), 'Instagram')"""
INSERT_QUERY_RESOURCE = """
INSERT INTO InformationResource (id, remoteId, informationId, imageUri, videoUri, importedAt, importedFrom)
VALUES (:id, :remoteId, :informationId, :imageUri, :videoUri, now(), 'Instagram')"""
SELECT_QUERY_INFORMATION = """
SELECT id FROM Information WHERE remoteId = :remoteId AND importedFrom = 'Instagram'"""


async def instagram():
console.log("Connecting to database...", style="bold green")
db = Database(url=os.getenv("DATABASE_URL"))
await db.connect()
for user, user_id in user_dict.items():
console.log(f"Fetching last 50 media items for {user}")
last_media = cl.user_medias(user_id, amount=50)
console.log(f"Found {len(last_media)} media items for {user}")
for media in last_media:
remote_id = f"{user}_{str(media.id)}"
media_db_id = uuid4()
console.log(f"Processing media item {remote_id}")
if await db.fetch_one(
SELECT_QUERY_INFORMATION, {"remoteId": str(remote_id)}
):
console.log(
f"Media item {remote_id} already in database", style="bold red"
)
continue
console.log(f"Media item {remote_id} not in database, inserting")

thumbnail_url = media.thumbnail_url
if not thumbnail_url:
for resource in media.resources:
if resource.thumbnail_url:
thumbnail_url = resource.thumbnail_url
break

if not thumbnail_url:
console.log(
f"Media item {remote_id} has no thumbnail, skipping",
style="bold red",
)
continue

console.log(f"Downloading thumbnail for {remote_id}")
try:
thumbnail = requests.get(thumbnail_url).content
filename = f"instagram_{uuid4()}.jpg"
with open(f"/app/cdn/{filename}", "wb") as f:
f.write(thumbnail)
thumbnail_url = f"/cdn/{filename}"
except Exception as e:
console.log(f"Error downloading thumbnail: {e}", style="bold red")
continue

await db.execute(
INSERT_QUERY_INFORMATION,
{
"id": media_db_id,
"remoteId": remote_id,
"text": media.caption_text,
"additionalInfo": user,
"imageUri": thumbnail_url,
"href": f"https://www.instagram.com/p/{media.code}",
"date": media.taken_at.strftime("%Y-%m-%d %H:%M:%S"),
},
)
for resource in media.resources:
thumbnail_url = resource.thumbnail_url
if thumbnail_url:
console.log(f"Downloading thumbnail for resource {resource.pk}")
try:
thumbnail = requests.get(thumbnail_url).content
filename = f"instagramr_{uuid4()}.jpg"
with open(f"/app/cdn/{filename}", "wb") as f:
f.write(thumbnail)
thumbnail_url = f"/cdn/{filename}"
except Exception as e:
console.log(
f"Error downloading thumbnail: {e}", style="bold red"
)
continue

await db.execute(
INSERT_QUERY_RESOURCE,
{
"id": uuid4(),
"remoteId": str(resource.pk),
"informationId": media_db_id,
"imageUri": thumbnail_url,
"videoUri": resource.video_url,
},
)


asyncio.run(instagram())
3 changes: 3 additions & 0 deletions src/dataimport/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@ praw==7.7.1
rich==12.4.4
requests
openai==1.7.2
pyotp==2.9.0
instagrapi==2.0.1
pillow==10.2.0
2 changes: 1 addition & 1 deletion src/dataimport/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ async def youtube():
yt_data = requests.get(collection_url).json()["items"][0]["community"]

for yt in yt_data:
query = f"SELECT * FROM Information WHERE remoteId='{yt['id']}'"
query = f"SELECT * FROM Information WHERE remoteId='{yt['id']}' AND importedFrom='YouTube'"
result = await db.fetch_one(query=query)
if result:
console.log(f"{yt['id']} already in database", style="bold red")
Expand Down
Loading

0 comments on commit a0fa5d2

Please sign in to comment.