Skip to content

Commit

Permalink
Improve Telegram integration and session management
Browse files Browse the repository at this point in the history
  • Loading branch information
U039b committed Oct 30, 2023
1 parent 46d4887 commit a38a669
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 11 deletions.
17 changes: 17 additions & 0 deletions video_downloading_platform/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,23 @@ def get_thumbnail_id(self):
print(e)
return None

def get_thumbnail_id_for(self, content_name):
print(f'Search for the thumbnail of {content_name}')
name = ''.join(content_name.split('.')[:-1])
try:
for c in self.downloadedcontent_set.filter(mime_type__in=['image/jpeg', 'image/png', 'image/webp'], name__icontains=name).all():
print(f' found {c.name}')
return c.id
except Exception:
pass
try:
content = self.downloadedcontent_set.filter(mime_type__in=['image/jpeg', 'image/png', 'image/webp']).exclude(name='webpage_screenshot.png').first()
if content:
return content.id
except Exception as e:
print(e)
return None


def _get_upload_dir(instance, filename):
owner_id = instance.owner.id
Expand Down
2 changes: 1 addition & 1 deletion video_downloading_platform/core/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,7 @@ def index_download_request(request: DownloadRequest):
entity.md5 = content.md5
entity.sha256 = content.sha256
entity.status = request.get_status_display()
entity.thumbnail_content_id = report.get_thumbnail_id()
entity.thumbnail_content_id = report.get_thumbnail_id_for(content.name)
entity.exif = '\n'.join([f'{k}: {v}' for k, v in __parse_exif(content.exif_data).items()])
entity.content_warning = request.content_warning

Expand Down
97 changes: 87 additions & 10 deletions video_downloading_platform/core/telegram.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,76 @@
import json
import os.path
import re
import time
import random

import cv2
from telethon import TelegramClient
from telethon.tl.types import Message
from telethon.sessions.sqlite import SQLiteSession


class TelegramSessionPool:
__sessions: list[str]
__session_states: dict

def __init__(self):
self.refresh_session_list()

def session_exists(self, session_name):
self.refresh_session_list()
return os.path.isfile(f'{session_name}.session') and session_name in self.__sessions

def lock_session(self, session_name):
with open(f'{session_name}.lock', mode='w') as lock:
lock.write(str(time.time()))

def release_session(self, session_name):
self.refresh_session_list()
if self.session_exists(session_name) and os.path.isfile(f'{session_name}.lock'):
os.unlink(f'{session_name}.lock')
self.refresh_session_list()

def is_locked(self, session_name):
return os.path.isfile(f'{session_name}.lock')

def print_sessions(self):
print(self.__session_states)

def free_old_sessions(self):
for session, locked in self.__session_states.items():
if not locked:
continue
current_time = time.time()
with open(f'{session}.lock') as lock:
raw = lock.read().strip()
lock_time = float(raw)
delta = current_time - lock_time
if delta > 12*3600: # 12 hours
os.unlink(f'{session}.lock')

def get_session(self):
self.refresh_session_list()
self.free_old_sessions()
self.refresh_session_list()
try:
for session, locked in self.__session_states.items():
if not locked:
self.lock_session(session)
return session

session = f'annon_bot-{time.time_ns()}'
self.lock_session(session)
return session
finally:
self.refresh_session_list()

def refresh_session_list(self):
self.__session_states = {}
self.__sessions = SQLiteSession.list_sessions()
for s in self.__sessions:
self.__session_states[s] = self.is_locked(s)



class TelegramPostDownloader:
Expand All @@ -18,6 +84,7 @@ class TelegramPostDownloader:
bot_token: str
__session_id: str = 'anon_bot'
__client: TelegramClient
__session_pool: TelegramSessionPool
url_regex = r"^https:\/\/t\.me\/(?P<user_id>.*?)\/(?P<post_id>[0-9]+)"

def __init__(self, api_id: int, api_hash: str, bot_token: str):
Expand All @@ -26,7 +93,16 @@ def __init__(self, api_id: int, api_hash: str, bot_token: str):
self.bot_token = bot_token

def login(self):
self.__client = TelegramClient(self.__session_id, self.api_id, self.api_hash).start(bot_token=self.bot_token)
time.sleep(random.uniform(0.2, 2.1))
self.__session_pool = TelegramSessionPool()
self.__session_id = self.__session_pool.get_session()
self.__session_pool.lock_session(self.__session_id)
print(f'TG log in with session [{self.__session_id}]')
try:
self.__client = TelegramClient(self.__session_id, self.api_id, self.api_hash).start(bot_token=self.bot_token)
except Exception as e:
self.__session_pool.release_session(self.__session_id)
raise e

def prepare(self, url: str) -> bool:
self.url = url
Expand All @@ -37,10 +113,6 @@ def prepare(self, url: str) -> bool:
return True
return False

@staticmethod
def __download(client: TelegramClient, user_id: str, post_id: int, output_dir: str):
pass

async def get_message(self):
messages = []
message: Message = await self.__client.get_messages(self.user_id, ids=self.post_id)
Expand All @@ -67,7 +139,7 @@ async def get_message(self):
return messages

def __create_thumbnails(self):
for filename in glob.glob(f'{self.output_dir}/*.mp4'):
for filename in glob.glob(f'{self.output_dir}/*.mp4')+glob.glob(f'{self.output_dir}/*.MP4'):
root, ext = os.path.splitext(filename)
print(f'Creating thumbnail for {filename}: {root}|{ext}')
cap = None
Expand All @@ -93,8 +165,13 @@ async def __download_message(self, message: Message):

def download(self, output_dir: str):
self.output_dir = output_dir
with self.__client:
messages: list[Message] = self.__client.loop.run_until_complete(self.get_message())
for message in messages:
self.__client.loop.run_until_complete(self.__download_message(message))
try:
with self.__client:
messages: list[Message] = self.__client.loop.run_until_complete(self.get_message())
for message in messages:
self.__client.loop.run_until_complete(self.__download_message(message))
finally:
print(f'Release TG session [{self.__session_id}]')
self.__session_pool.release_session(self.__session_id)
self.__create_thumbnails()

0 comments on commit a38a669

Please sign in to comment.