Skip to content

Commit

Permalink
added twitter dataimport, fixes #3
Browse files Browse the repository at this point in the history
  • Loading branch information
zaanposni committed Sep 23, 2024
1 parent ae22d0e commit d602c9a
Show file tree
Hide file tree
Showing 21 changed files with 628 additions and 227 deletions.
10 changes: 7 additions & 3 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ KOFI_USERNAME=zaanposni

OPENAI_API_KEY=

INSTAGRAM_USERNAME =
INSTAGRAM_PASSWORD =
INSTAGRAM_2FA_SECRET =
INSTAGRAM_USERNAME=
INSTAGRAM_PASSWORD=
INSTAGRAM_2FA_SECRET=

TWITTER_USERNAME=
TWITTER_PASSWORD=
TWITTER_LIST_ID=
3 changes: 3 additions & 0 deletions docker-compose-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ services:
- INSTAGRAM_PASSWORD=${INSTAGRAM_PASSWORD}
- INSTAGRAM_2FA_SECRET=${INSTAGRAM_2FA_SECRET}
- INSTAGRAM_CONFIG_PATH=/app/config/instagram.json
- TWITTER_USERNAME=${TWITTER_USERNAME}
- TWITTER_PASSWORD=${TWITTER_PASSWORD}
- TWITTER_LIST_ID=${TWITTER_LIST_ID}
volumes:
- shared-data:/app/cdn
- config:/app/config
Expand Down
3 changes: 3 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ services:
- INSTAGRAM_PASSWORD=${INSTAGRAM_PASSWORD}
- INSTAGRAM_2FA_SECRET=${INSTAGRAM_2FA_SECRET}
- INSTAGRAM_CONFIG_PATH=/app/config/instagram.json
- TWITTER_USERNAME=${TWITTER_USERNAME}
- TWITTER_PASSWORD=${TWITTER_PASSWORD}
- TWITTER_LIST_ID=${TWITTER_LIST_ID}
volumes:
- shared-data:/app/cdn
- config:/app/config
Expand Down
10 changes: 7 additions & 3 deletions src/dataimport/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ TWITCH_CLIENT_SECRET=

OPENAI_API_KEY=

INSTAGRAM_USERNAME =
INSTAGRAM_PASSWORD =
INSTAGRAM_2FA_SECRET =
INSTAGRAM_USERNAME=
INSTAGRAM_PASSWORD=
INSTAGRAM_2FA_SECRET=

TWITTER_USERNAME=
TWITTER_PASSWORD=
TWITTER_LIST_ID=
3 changes: 3 additions & 0 deletions src/dataimport/hello-cron
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
*/15 * * * * . /root/project_env.sh; /usr/local/bin/python /app/reddit.py >> /var/log/cron.log 2>&1
*/1 * * * * . /root/project_env.sh; /usr/local/bin/python /app/twitch.py >> /var/log/cron.log 2>&1
# 10 1 * * * . /root/project_env.sh; /usr/local/bin/python /app/informationopenaianalyze.py >> /var/log/cron.log 2>&1
41 01 * * * . /root/project_env.sh; /usr/local/bin/python /app/twitter.py >> /var/log/cron.log 2>&1
47 10 * * * . /root/project_env.sh; /usr/local/bin/python /app/twitter.py >> /var/log/cron.log 2>&1
33 16 * * * . /root/project_env.sh; /usr/local/bin/python /app/twitter.py >> /var/log/cron.log 2>&1
2 * * * * . /root/project_env.sh; /usr/local/bin/python /app/pietsmietdeuploadplan.py >> /var/log/cron.log 2>&1
5 * * * * . /root/project_env.sh; /usr/local/bin/python /app/pietsmietdeuploadplan.py >> /var/log/cron.log 2>&1
30 * * * * . /root/project_env.sh; /usr/local/bin/python /app/pietsmietdeuploadplan.py >> /var/log/cron.log 2>&1
Expand Down
4 changes: 4 additions & 0 deletions src/dataimport/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,7 @@ pyotp==2.9.0
instagrapi==2.0.1
pillow==10.3.0
blinker==1.7.0
tweety-ns==1.1.9
httpx==0.27.2
h2==4.1.0
httpx[http2]
161 changes: 161 additions & 0 deletions src/dataimport/twitter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import asyncio
import os
import time
import random
from databases import Database
from uuid import uuid4

import requests
from rich.console import Console
from tweety import Twitter
from tweety.types import Tweet, SelfThread, ConversationThread


INSERT_QUERY_INFORMATION = """
INSERT INTO Information (id, remoteId, text, additionalInfo, imageUri, href, date, analyzedAt, importedAt, importedFrom)
VALUES (:id, :remoteId, :text, :additionalInfo, NULL, :href, :date, NULL, now(), 'Twitter')"""
INSERT_QUERY_RESOURCE = """
INSERT INTO InformationResource (id, remoteId, informationId, imageUri, videoUri, importedAt, importedFrom)
VALUES (:id, :remoteId, :informationId, :imageUri, :videoUri, now(), 'Twitter')"""
SELECT_QUERY_INFORMATION = """
SELECT id FROM Information WHERE remoteId = :remoteId AND importedFrom = 'Twitter'"""

USER_DICT = {
120150508: "jay",
394250799: "brammen",
832560607: "sep",
400567148: "chris",
109850283: "peter",
}

console = Console()
app = Twitter("session")

# create cdn directory if not exists
if not os.path.exists("/app/cdn/twitter"):
console.log("Creating /app/cdn/twitter directory...", style="bold green")
os.makedirs("/app/cdn/twitter")

USERNAME = os.getenv("TWITTER_USERNAME")
PASSWORD = os.getenv("TWITTER_PASSWORD")
LIST_ID = os.getenv("TWITTER_LIST_ID")

if not USERNAME or not PASSWORD:
raise Exception("No Twitter username or password provided")

if not LIST_ID:
raise Exception("No Twitter list ID provided")

app.start(USERNAME, PASSWORD)


async def handle_tweet(tweet: Tweet, db):
if tweet.author.id not in USER_DICT:
console.log(
f"Tweet author {tweet.author.id} not in user dict. skipping",
style="bold red",
)
return

if tweet.is_retweet:
console.log(f"Tweet {tweet.id} is a retweet, skipping", style="bold red")
return

remote_id = f"{tweet.author.id}_{tweet.id}"
media_db_id = uuid4()
console.log(f"Processing tweet {remote_id}")

if await db.fetch_one(SELECT_QUERY_INFORMATION, {"remoteId": str(remote_id)}):
console.log(f"Tweet {remote_id} already in database", style="bold red")
return

console.log(f"Tweet {remote_id} not in database, inserting")

await db.execute(
INSERT_QUERY_INFORMATION,
{
"id": media_db_id,
"remoteId": remote_id,
"text": tweet.text,
"additionalInfo": USER_DICT[tweet.author.id],
"href": tweet.url,
"date": tweet.created_on.strftime("%Y-%m-%d %H:%M:%S"),
},
)

if random.randint(0, 100) < 50:
try:
console.log(f"Liking tweet {remote_id}")
app.like_tweet(tweet.id)
except Exception as e:
console.log(f"Error liking tweet: {e}", style="bold red")

if tweet.media:
for media in tweet.media:
console.log(f"Downloading media {media.id} for tweet {remote_id}")
time.sleep(random.randint(10, 30))

thumbnail_url = media.media_url_https
if thumbnail_url:
console.log(f"Donwloading thumbnail for media {media.id}")
try:
thumbnail = requests.get(thumbnail_url).content
filename = f"r_{uuid4()}.jpg"
with open(f"/app/cdn/twitter/{filename}", "wb") as f:
f.write(thumbnail)
thumbnail_url = f"/cdn/twitter/{filename}"
except Exception as e:
console.log(f"Error downloading thumbnail: {e}", style="bold red")
continue

video_url = None
if media.type == "video":
console.log(f"Media {media.id} is a video. Downloading...")
try:
filename = f"r_{uuid4()}.mp4"
media.best_stream().download(f"/app/cdn/twitter/{filename}")
video_url = f"/cdn/twitter/{filename}"
except Exception as e:
console.log(f"Error downloading video: {e}", style="bold red")
continue

await db.execute(
INSERT_QUERY_RESOURCE,
{
"id": media.id,
"remoteId": f"{remote_id}_{media.id}",
"informationId": media_db_id,
"imageUri": thumbnail_url,
"videoUri": video_url,
},
)


async def twitter():
console.log("Connecting to database...", style="bold green")
db = Database(url=os.getenv("DATABASE_URL"))
await db.connect()

console.log("Fetching last tweets of list...")

contents = app.get_list_tweets(LIST_ID)
for content in contents:
if isinstance(content, Tweet):
await handle_tweet(content, db)
if isinstance(content, SelfThread):
print("=== Detected a self thread")
for t in content.tweets:
if isinstance(t, Tweet):
await handle_tweet(t, db)
print("=== End of self thread")
if isinstance(content, ConversationThread):
print("=== Detected a conversation thread")
for t in content.threads:
if isinstance(t, Tweet):
await handle_tweet(t, db)
print("=== End of conversation thread")

console.log("Done")


asyncio.run(twitter())
10 changes: 5 additions & 5 deletions src/psaggregator/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/psaggregator/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "psaggregator",
"version": "1.11.1",
"version": "1.12.0",
"scripts": {
"dev": "vite dev",
"build": "vite build",
Expand Down
42 changes: 23 additions & 19 deletions src/psaggregator/src/lib/components/NewsBig.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,29 @@
import { LogoYoutube, LogoTwitter, LogoInstagram, FaceDissatisfied } from "carbon-icons-svelte";
import { browser } from "$app/environment";
import { ImportType, type Information, type InformationResource } from "@prisma/client";
import TwitterPost from "./TwitterPost.svelte";
export let youtubeCommunityPosts: Array<Information & { InformationResource: InformationResource[] }>;
export let instagramPosts: Array<Information & { InformationResource: InformationResource[] }>;
export let twitterPosts: Array<Information & { InformationResource: InformationResource[] }>;
const batchSize = 20;
let skip = 0;
let loading = {
[ImportType.YouTube]: false,
[ImportType.Instagram]: false
[ImportType.Instagram]: false,
[ImportType.Twitter]: false
};
let endReached = {
[ImportType.YouTube]: youtubeCommunityPosts.length < batchSize,
[ImportType.Instagram]: instagramPosts.length < batchSize
[ImportType.YouTube]: youtubeCommunityPosts.length % batchSize !== 0,
[ImportType.Instagram]: instagramPosts.length % batchSize !== 0,
[ImportType.Twitter]: twitterPosts.length % batchSize !== 0
};
async function loadMore() {
for (const type of [ImportType.YouTube, ImportType.Instagram]) {
for (const type of [ImportType.YouTube, ImportType.Instagram, ImportType.Twitter]) {
if (loading[type] || endReached[type]) {
continue;
}
Expand All @@ -38,6 +43,8 @@
youtubeCommunityPosts = [...youtubeCommunityPosts, ...newInformation];
} else if (type === ImportType.Instagram) {
instagramPosts = [...instagramPosts, ...newInformation];
} else if (type === ImportType.Twitter) {
twitterPosts = [...twitterPosts, ...newInformation];
}
loading[type] = false;
Expand Down Expand Up @@ -112,31 +119,28 @@
</div>
<div>
<div class="mb-2 ml-2 flex items-center text-2xl">
<img alt="threads" src="/threads-logo.svg" class="mr-2 inline-block h-8 w-8" />
Threads
<LogoTwitter size={32} class="mr-2" />
Twitter
</div>
<div class="mx-auto flex flex-col items-center text-center">
<div>
<FaceDissatisfied size={32} />
</div>
<span>Leider gibt es noch keinen Threads-Import.</span>
<span>Dieses Projekt ist Open Source.</span>
<span
>Beteilige dich gerne auf
<a href={GITHUB_URL} class="underline" target="_blank">GitHub</a>
</span>
<div class="flex flex-col gap-y-4">
{#each twitterPosts as twitter}
<TwitterPost post={twitter} />
{/each}
</div>
{#if loading[ImportType.Twitter]}
<div class="col-span-full mt-4 flex w-full items-center justify-center text-center md:mt-8">loading...</div>
{/if}
</div>
<div>
<div class="mb-2 ml-2 flex items-center text-2xl">
<LogoTwitter size={32} class="mr-2" />
Twitter
<img alt="threads" src="/threads-logo.svg" class="mr-2 inline-block h-8 w-8" />
Threads
</div>
<div class="mx-auto flex flex-col items-center text-center">
<div>
<FaceDissatisfied size={32} />
</div>
<span>Leider gibt es noch keinen Twitter-Import.</span>
<span>Leider gibt es noch keinen Threads-Import.</span>
<span>Dieses Projekt ist Open Source.</span>
<span
>Beteilige dich gerne auf
Expand Down
Loading

0 comments on commit d602c9a

Please sign in to comment.