From d602c9aa2f614ab603f063b8f9c1f6493b6c70df Mon Sep 17 00:00:00 2001 From: zaanposni Date: Mon, 23 Sep 2024 12:43:13 +0200 Subject: [PATCH] added twitter dataimport, fixes #3 --- .env.example | 10 +- docker-compose-test.yml | 3 + docker-compose.yml | 3 + src/dataimport/.env.example | 10 +- src/dataimport/hello-cron | 3 + src/dataimport/requirements.txt | 4 + src/dataimport/twitter.py | 161 +++++++++++++ src/psaggregator/package-lock.json | 10 +- src/psaggregator/package.json | 2 +- .../src/lib/components/NewsBig.svelte | 42 ++-- .../src/lib/components/NewsSmall.svelte | 34 ++- .../src/lib/components/TwitterPost.svelte | 50 ++++ .../ui/alert/alert-description.svelte | 13 + .../components/ui/alert/alert-title.svelte | 21 ++ .../src/lib/components/ui/alert/alert.svelte | 17 ++ .../src/lib/components/ui/alert/index.ts | 33 +++ src/psaggregator/src/routes/+page.server.ts | 227 ++++++++++-------- src/psaggregator/src/routes/+page.svelte | 37 ++- src/psaggregator/src/routes/api/+page.svelte | 3 +- .../src/routes/news/+page.server.ts | 133 +++++----- src/psaggregator/src/routes/news/+page.svelte | 39 ++- 21 files changed, 628 insertions(+), 227 deletions(-) create mode 100644 src/dataimport/twitter.py create mode 100644 src/psaggregator/src/lib/components/TwitterPost.svelte create mode 100644 src/psaggregator/src/lib/components/ui/alert/alert-description.svelte create mode 100644 src/psaggregator/src/lib/components/ui/alert/alert-title.svelte create mode 100644 src/psaggregator/src/lib/components/ui/alert/alert.svelte create mode 100644 src/psaggregator/src/lib/components/ui/alert/index.ts diff --git a/.env.example b/.env.example index 0eff420..8947965 100644 --- a/.env.example +++ b/.env.example @@ -14,6 +14,10 @@ KOFI_USERNAME=zaanposni OPENAI_API_KEY= -INSTAGRAM_USERNAME = -INSTAGRAM_PASSWORD = -INSTAGRAM_2FA_SECRET = +INSTAGRAM_USERNAME= +INSTAGRAM_PASSWORD= +INSTAGRAM_2FA_SECRET= + +TWITTER_USERNAME= +TWITTER_PASSWORD= +TWITTER_LIST_ID= diff --git a/docker-compose-test.yml b/docker-compose-test.yml index 51e8dc4..b3a90d5 100644 --- a/docker-compose-test.yml +++ b/docker-compose-test.yml @@ -53,6 +53,9 @@ services: - INSTAGRAM_PASSWORD=${INSTAGRAM_PASSWORD} - INSTAGRAM_2FA_SECRET=${INSTAGRAM_2FA_SECRET} - INSTAGRAM_CONFIG_PATH=/app/config/instagram.json + - TWITTER_USERNAME=${TWITTER_USERNAME} + - TWITTER_PASSWORD=${TWITTER_PASSWORD} + - TWITTER_LIST_ID=${TWITTER_LIST_ID} volumes: - shared-data:/app/cdn - config:/app/config diff --git a/docker-compose.yml b/docker-compose.yml index d4c00ac..db897d6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -47,6 +47,9 @@ services: - INSTAGRAM_PASSWORD=${INSTAGRAM_PASSWORD} - INSTAGRAM_2FA_SECRET=${INSTAGRAM_2FA_SECRET} - INSTAGRAM_CONFIG_PATH=/app/config/instagram.json + - TWITTER_USERNAME=${TWITTER_USERNAME} + - TWITTER_PASSWORD=${TWITTER_PASSWORD} + - TWITTER_LIST_ID=${TWITTER_LIST_ID} volumes: - shared-data:/app/cdn - config:/app/config diff --git a/src/dataimport/.env.example b/src/dataimport/.env.example index 73b19c9..fcbd779 100644 --- a/src/dataimport/.env.example +++ b/src/dataimport/.env.example @@ -8,6 +8,10 @@ TWITCH_CLIENT_SECRET= OPENAI_API_KEY= -INSTAGRAM_USERNAME = -INSTAGRAM_PASSWORD = -INSTAGRAM_2FA_SECRET = +INSTAGRAM_USERNAME= +INSTAGRAM_PASSWORD= +INSTAGRAM_2FA_SECRET= + +TWITTER_USERNAME= +TWITTER_PASSWORD= +TWITTER_LIST_ID= diff --git a/src/dataimport/hello-cron b/src/dataimport/hello-cron index e6f7060..0fae170 100644 --- a/src/dataimport/hello-cron +++ b/src/dataimport/hello-cron @@ -12,6 +12,9 @@ */15 * * * * . /root/project_env.sh; /usr/local/bin/python /app/reddit.py >> /var/log/cron.log 2>&1 */1 * * * * . /root/project_env.sh; /usr/local/bin/python /app/twitch.py >> /var/log/cron.log 2>&1 # 10 1 * * * . /root/project_env.sh; /usr/local/bin/python /app/informationopenaianalyze.py >> /var/log/cron.log 2>&1 +41 01 * * * . /root/project_env.sh; /usr/local/bin/python /app/twitter.py >> /var/log/cron.log 2>&1 +47 10 * * * . /root/project_env.sh; /usr/local/bin/python /app/twitter.py >> /var/log/cron.log 2>&1 +33 16 * * * . /root/project_env.sh; /usr/local/bin/python /app/twitter.py >> /var/log/cron.log 2>&1 2 * * * * . /root/project_env.sh; /usr/local/bin/python /app/pietsmietdeuploadplan.py >> /var/log/cron.log 2>&1 5 * * * * . /root/project_env.sh; /usr/local/bin/python /app/pietsmietdeuploadplan.py >> /var/log/cron.log 2>&1 30 * * * * . /root/project_env.sh; /usr/local/bin/python /app/pietsmietdeuploadplan.py >> /var/log/cron.log 2>&1 diff --git a/src/dataimport/requirements.txt b/src/dataimport/requirements.txt index 1a9eac5..a4a9a2c 100644 --- a/src/dataimport/requirements.txt +++ b/src/dataimport/requirements.txt @@ -13,3 +13,7 @@ pyotp==2.9.0 instagrapi==2.0.1 pillow==10.3.0 blinker==1.7.0 +tweety-ns==1.1.9 +httpx==0.27.2 +h2==4.1.0 +httpx[http2] diff --git a/src/dataimport/twitter.py b/src/dataimport/twitter.py new file mode 100644 index 0000000..4ba9b90 --- /dev/null +++ b/src/dataimport/twitter.py @@ -0,0 +1,161 @@ +import asyncio +import os +import time +import random +from databases import Database +from uuid import uuid4 + +import requests +from rich.console import Console +from tweety import Twitter +from tweety.types import Tweet, SelfThread, ConversationThread + + +INSERT_QUERY_INFORMATION = """ + INSERT INTO Information (id, remoteId, text, additionalInfo, imageUri, href, date, analyzedAt, importedAt, importedFrom) + VALUES (:id, :remoteId, :text, :additionalInfo, NULL, :href, :date, NULL, now(), 'Twitter')""" +INSERT_QUERY_RESOURCE = """ + INSERT INTO InformationResource (id, remoteId, informationId, imageUri, videoUri, importedAt, importedFrom) + VALUES (:id, :remoteId, :informationId, :imageUri, :videoUri, now(), 'Twitter')""" +SELECT_QUERY_INFORMATION = """ + SELECT id FROM Information WHERE remoteId = :remoteId AND importedFrom = 'Twitter'""" + +USER_DICT = { + 120150508: "jay", + 394250799: "brammen", + 832560607: "sep", + 400567148: "chris", + 109850283: "peter", +} + +console = Console() +app = Twitter("session") + +# create cdn directory if not exists +if not os.path.exists("/app/cdn/twitter"): + console.log("Creating /app/cdn/twitter directory...", style="bold green") + os.makedirs("/app/cdn/twitter") + +USERNAME = os.getenv("TWITTER_USERNAME") +PASSWORD = os.getenv("TWITTER_PASSWORD") +LIST_ID = os.getenv("TWITTER_LIST_ID") + +if not USERNAME or not PASSWORD: + raise Exception("No Twitter username or password provided") + +if not LIST_ID: + raise Exception("No Twitter list ID provided") + +app.start(USERNAME, PASSWORD) + + +async def handle_tweet(tweet: Tweet, db): + if tweet.author.id not in USER_DICT: + console.log( + f"Tweet author {tweet.author.id} not in user dict. skipping", + style="bold red", + ) + return + + if tweet.is_retweet: + console.log(f"Tweet {tweet.id} is a retweet, skipping", style="bold red") + return + + remote_id = f"{tweet.author.id}_{tweet.id}" + media_db_id = uuid4() + console.log(f"Processing tweet {remote_id}") + + if await db.fetch_one(SELECT_QUERY_INFORMATION, {"remoteId": str(remote_id)}): + console.log(f"Tweet {remote_id} already in database", style="bold red") + return + + console.log(f"Tweet {remote_id} not in database, inserting") + + await db.execute( + INSERT_QUERY_INFORMATION, + { + "id": media_db_id, + "remoteId": remote_id, + "text": tweet.text, + "additionalInfo": USER_DICT[tweet.author.id], + "href": tweet.url, + "date": tweet.created_on.strftime("%Y-%m-%d %H:%M:%S"), + }, + ) + + if random.randint(0, 100) < 50: + try: + console.log(f"Liking tweet {remote_id}") + app.like_tweet(tweet.id) + except Exception as e: + console.log(f"Error liking tweet: {e}", style="bold red") + + if tweet.media: + for media in tweet.media: + console.log(f"Downloading media {media.id} for tweet {remote_id}") + time.sleep(random.randint(10, 30)) + + thumbnail_url = media.media_url_https + if thumbnail_url: + console.log(f"Donwloading thumbnail for media {media.id}") + try: + thumbnail = requests.get(thumbnail_url).content + filename = f"r_{uuid4()}.jpg" + with open(f"/app/cdn/twitter/{filename}", "wb") as f: + f.write(thumbnail) + thumbnail_url = f"/cdn/twitter/{filename}" + except Exception as e: + console.log(f"Error downloading thumbnail: {e}", style="bold red") + continue + + video_url = None + if media.type == "video": + console.log(f"Media {media.id} is a video. Downloading...") + try: + filename = f"r_{uuid4()}.mp4" + media.best_stream().download(f"/app/cdn/twitter/{filename}") + video_url = f"/cdn/twitter/{filename}" + except Exception as e: + console.log(f"Error downloading video: {e}", style="bold red") + continue + + await db.execute( + INSERT_QUERY_RESOURCE, + { + "id": media.id, + "remoteId": f"{remote_id}_{media.id}", + "informationId": media_db_id, + "imageUri": thumbnail_url, + "videoUri": video_url, + }, + ) + + +async def twitter(): + console.log("Connecting to database...", style="bold green") + db = Database(url=os.getenv("DATABASE_URL")) + await db.connect() + + console.log("Fetching last tweets of list...") + + contents = app.get_list_tweets(LIST_ID) + for content in contents: + if isinstance(content, Tweet): + await handle_tweet(content, db) + if isinstance(content, SelfThread): + print("=== Detected a self thread") + for t in content.tweets: + if isinstance(t, Tweet): + await handle_tweet(t, db) + print("=== End of self thread") + if isinstance(content, ConversationThread): + print("=== Detected a conversation thread") + for t in content.threads: + if isinstance(t, Tweet): + await handle_tweet(t, db) + print("=== End of conversation thread") + + console.log("Done") + + +asyncio.run(twitter()) diff --git a/src/psaggregator/package-lock.json b/src/psaggregator/package-lock.json index 812592b..e4e41bf 100644 --- a/src/psaggregator/package-lock.json +++ b/src/psaggregator/package-lock.json @@ -1,12 +1,12 @@ { "name": "psaggregator", - "version": "1.11.0", + "version": "1.12.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "psaggregator", - "version": "1.11.0", + "version": "1.12.0", "dependencies": { "@internationalized/date": "^3.5.5", "bits-ui": "^0.21.13", @@ -1836,9 +1836,9 @@ } }, "node_modules/caniuse-lite": { - "version": "1.0.30001576", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001576.tgz", - "integrity": "sha512-ff5BdakGe2P3SQsMsiqmt1Lc8221NR1VzHj5jXN5vBny9A6fpze94HiVV/n7XRosOlsShJcvMv5mdnpjOGCEgg==", + "version": "1.0.30001663", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001663.tgz", + "integrity": "sha512-o9C3X27GLKbLeTYZ6HBOLU1tsAcBZsLis28wrVzddShCS16RujjHp9GDHKZqrB3meE0YjhawvMFsGb/igqiPzA==", "dev": true, "funding": [ { diff --git a/src/psaggregator/package.json b/src/psaggregator/package.json index 6daf0c7..67452c1 100644 --- a/src/psaggregator/package.json +++ b/src/psaggregator/package.json @@ -1,6 +1,6 @@ { "name": "psaggregator", - "version": "1.11.1", + "version": "1.12.0", "scripts": { "dev": "vite dev", "build": "vite build", diff --git a/src/psaggregator/src/lib/components/NewsBig.svelte b/src/psaggregator/src/lib/components/NewsBig.svelte index 3f6be77..88179cf 100644 --- a/src/psaggregator/src/lib/components/NewsBig.svelte +++ b/src/psaggregator/src/lib/components/NewsBig.svelte @@ -6,24 +6,29 @@ import { LogoYoutube, LogoTwitter, LogoInstagram, FaceDissatisfied } from "carbon-icons-svelte"; import { browser } from "$app/environment"; import { ImportType, type Information, type InformationResource } from "@prisma/client"; + import TwitterPost from "./TwitterPost.svelte"; export let youtubeCommunityPosts: Array; export let instagramPosts: Array; + export let twitterPosts: Array; const batchSize = 20; let skip = 0; let loading = { [ImportType.YouTube]: false, - [ImportType.Instagram]: false + [ImportType.Instagram]: false, + [ImportType.Twitter]: false }; + let endReached = { - [ImportType.YouTube]: youtubeCommunityPosts.length < batchSize, - [ImportType.Instagram]: instagramPosts.length < batchSize + [ImportType.YouTube]: youtubeCommunityPosts.length % batchSize !== 0, + [ImportType.Instagram]: instagramPosts.length % batchSize !== 0, + [ImportType.Twitter]: twitterPosts.length % batchSize !== 0 }; async function loadMore() { - for (const type of [ImportType.YouTube, ImportType.Instagram]) { + for (const type of [ImportType.YouTube, ImportType.Instagram, ImportType.Twitter]) { if (loading[type] || endReached[type]) { continue; } @@ -38,6 +43,8 @@ youtubeCommunityPosts = [...youtubeCommunityPosts, ...newInformation]; } else if (type === ImportType.Instagram) { instagramPosts = [...instagramPosts, ...newInformation]; + } else if (type === ImportType.Twitter) { + twitterPosts = [...twitterPosts, ...newInformation]; } loading[type] = false; @@ -112,31 +119,28 @@
- threads - Threads + + Twitter
-
-
- -
- Leider gibt es noch keinen Threads-Import. - Dieses Projekt ist Open Source. - Beteilige dich gerne auf - GitHub - +
+ {#each twitterPosts as twitter} + + {/each}
+ {#if loading[ImportType.Twitter]} +
loading...
+ {/if}
- - Twitter + threads + Threads
- Leider gibt es noch keinen Twitter-Import. + Leider gibt es noch keinen Threads-Import. Dieses Projekt ist Open Source. Beteilige dich gerne auf diff --git a/src/psaggregator/src/lib/components/NewsSmall.svelte b/src/psaggregator/src/lib/components/NewsSmall.svelte index f01cb8e..d055595 100644 --- a/src/psaggregator/src/lib/components/NewsSmall.svelte +++ b/src/psaggregator/src/lib/components/NewsSmall.svelte @@ -5,9 +5,11 @@ import { LogoYoutube, LogoTwitter, LogoInstagram, FaceDissatisfied } from "carbon-icons-svelte"; import { type Information, type InformationResource } from "@prisma/client"; import * as Tabs from "$lib/components/ui/tabs"; + import TwitterPost from "./TwitterPost.svelte"; export let youtubeCommunityPosts: Array; export let instagramPosts: Array; + export let twitterPosts: Array;