Skip to content

Commit

Permalink
chg: [chats] translate messages on demand
Browse files Browse the repository at this point in the history
  • Loading branch information
Terrtia committed Dec 4, 2023
1 parent 941838a commit bef4e69
Show file tree
Hide file tree
Showing 11 changed files with 205 additions and 21 deletions.
88 changes: 88 additions & 0 deletions bin/lib/Language.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,20 @@
import os
import sys

import cld3
from libretranslatepy import LibreTranslateAPI

sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader

config_loader = ConfigLoader()
TRANSLATOR_URL = config_loader.get_config_str('Translation', 'libretranslate')
config_loader = None


dict_iso_languages = {
'af': 'Afrikaans',
'am': 'Amharic',
Expand Down Expand Up @@ -237,3 +251,77 @@ def get_iso_from_languages(l_languages, sort=False):
if sort:
l_iso = sorted(l_iso)
return l_iso


class LanguageDetector:
pass

def get_translator_instance():
return TRANSLATOR_URL

class LanguageTranslator:

def __init__(self):
self.lt = LibreTranslateAPI(get_translator_instance())

def languages(self):
languages = []
try:
for dict_lang in self.lt.languages():
languages.append({'iso': dict_lang['code'], 'language': dict_lang['name']})
except:
pass
return languages

def detect_cld3(self, content):
for lang in cld3.get_frequent_languages(content, num_langs=1):
return lang.language

def detect_libretranslate(self, content):
try:
language = self.lt.detect(content)
except: # TODO ERROR MESSAGE
language = None
if language:
return language[0].get('language')

def detect(self, content): # TODO replace by gcld3
# cld3
if len(content) >= 200:
language = self.detect_cld3(content)
# libretranslate
else:
language = self.detect_libretranslate(content)
return language

def translate(self, content, source=None, target="en"): # TODO source target
translation = None
if content:
if not source:
source = self.detect(content)
# print(source, content)
if source:
if source != target:
try:
# print(content, source, target)
translation = self.lt.translate(content, source, target)
except:
translation = None
# TODO LOG and display error
if translation == content:
print('EQUAL')
translation = None
return translation


LIST_LANGUAGES = LanguageTranslator().languages()

def get_translation_languages():
return LIST_LANGUAGES


if __name__ == '__main__':
t_content = ''
langg = LanguageTranslator()
# lang.translate(t_content, source='ru')
langg.languages()
21 changes: 15 additions & 6 deletions bin/lib/chats_viewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from lib.objects import ChatSubChannels
from lib.objects import ChatThreads
from lib.objects import Messages
from lib.objects import UsersAccount
from lib.objects import Usernames

config_loader = ConfigLoader()
Expand Down Expand Up @@ -307,7 +308,7 @@ def api_get_chat_service_instance(chat_instance_uuid):
return {"status": "error", "reason": "Unknown uuid"}, 404
return chat_instance.get_meta({'chats'}), 200

def api_get_chat(chat_id, chat_instance_uuid):
def api_get_chat(chat_id, chat_instance_uuid, translation_target=None):
chat = Chats.Chat(chat_id, chat_instance_uuid)
if not chat.exists():
return {"status": "error", "reason": "Unknown chat"}, 404
Expand All @@ -317,7 +318,7 @@ def api_get_chat(chat_id, chat_instance_uuid):
if meta['subchannels']:
meta['subchannels'] = get_subchannels_meta_from_global_id(meta['subchannels'])
else:
meta['messages'], meta['tags_messages'] = chat.get_messages()
meta['messages'], meta['tags_messages'] = chat.get_messages(translation_target=translation_target)
return meta, 200

def api_get_nb_message_by_week(chat_id, chat_instance_uuid):
Expand All @@ -328,7 +329,7 @@ def api_get_nb_message_by_week(chat_id, chat_instance_uuid):
# week = chat.get_nb_message_by_week('20231109')
return week, 200

def api_get_subchannel(chat_id, chat_instance_uuid):
def api_get_subchannel(chat_id, chat_instance_uuid, translation_target=None):
subchannel = ChatSubChannels.ChatSubChannel(chat_id, chat_instance_uuid)
if not subchannel.exists():
return {"status": "error", "reason": "Unknown subchannel"}, 404
Expand All @@ -339,17 +340,17 @@ def api_get_subchannel(chat_id, chat_instance_uuid):
meta['threads'] = get_threads_metas(meta['threads'])
if meta.get('username'):
meta['username'] = get_username_meta_from_global_id(meta['username'])
meta['messages'], meta['tags_messages'] = subchannel.get_messages()
meta['messages'], meta['tags_messages'] = subchannel.get_messages(translation_target=translation_target)
return meta, 200

def api_get_thread(thread_id, thread_instance_uuid):
def api_get_thread(thread_id, thread_instance_uuid, translation_target=None):
thread = ChatThreads.ChatThread(thread_id, thread_instance_uuid)
if not thread.exists():
return {"status": "error", "reason": "Unknown thread"}, 404
meta = thread.get_meta({'chat', 'nb_messages'})
# if meta['chat']:
# meta['chat'] = get_chat_meta_from_global_id(meta['chat'])
meta['messages'], meta['tags_messages'] = thread.get_messages()
meta['messages'], meta['tags_messages'] = thread.get_messages(translation_target=translation_target)
return meta, 200

def api_get_message(message_id):
Expand All @@ -362,6 +363,14 @@ def api_get_message(message_id):
# # meta['chat'] =
return meta, 200

def api_get_user_account(user_id, instance_uuid):
user_account = UsersAccount.UserAccount(user_id, instance_uuid)
if not user_account.exists():
return {"status": "error", "reason": "Unknown user-account"}, 404
meta = user_account.get_meta({'icon', 'username'})
print(meta)
return meta, 200

# # # # # # # # # # LATER
# #
# ChatCategory #
Expand Down
31 changes: 26 additions & 5 deletions bin/lib/objects/Messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from lib.ail_core import get_ail_uuid
from lib.objects.abstract_object import AbstractObject
from lib.ConfigLoader import ConfigLoader
from lib import Language
from lib.objects import UsersAccount
from lib.data_retention_engine import update_obj_date, get_obj_date_first
# TODO Set all messages ???
Expand Down Expand Up @@ -76,7 +77,13 @@ def get_content(self, r_type='str'): # TODO ADD cache # TODO Compress content ??
"""
Returns content
"""
content = self._get_field('content')
global_id = self.get_global_id()
content = r_cache.get(f'content:{global_id}')
if not content:
content = self._get_field('content')
if content:
r_cache.set(f'content:{global_id}', content)
r_cache.expire(f'content:{global_id}', 300)
if r_type == 'str':
return content
elif r_type == 'bytes':
Expand Down Expand Up @@ -153,11 +160,23 @@ def add_reaction(self, reactions, nb_reaction):
# message from channel ???
# message media

def get_translation(self): # TODO support multiple translated languages ?????
def get_translation(self, content=None, source=None, target='fr'):
"""
Returns translated content
"""
return self._get_field('translated') # TODO multiples translation ... -> use set
# return self._get_field('translated')
global_id = self.get_global_id()
translation = r_cache.get(f'translation:{target}:{global_id}')
r_cache.expire(f'translation:{target}:{global_id}', 0)
if translation:
return translation
if not content:
content = self.get_content()
translation = Language.LanguageTranslator().translate(content, source=source, target=target)
if translation:
r_cache.set(f'translation:{target}:{global_id}', translation)
r_cache.expire(f'translation:{target}:{global_id}', 300)
return translation

def _set_translation(self, translation):
"""
Expand Down Expand Up @@ -209,7 +228,7 @@ def get_misp_object(self): # TODO
# return r_object.hget(f'meta:item::{self.id}', 'url')

# options: set of optional meta fields
def get_meta(self, options=None, timestamp=None):
def get_meta(self, options=None, timestamp=None, translation_target='en'):
"""
:type options: set
:type timestamp: float
Expand Down Expand Up @@ -239,7 +258,7 @@ def get_meta(self, options=None, timestamp=None):
parent_type, _, parent_id = meta['parent'].split(':', 3)
if parent_type == 'message':
message = Message(parent_id)
meta['reply_to'] = message.get_meta(options=options)
meta['reply_to'] = message.get_meta(options=options, translation_target=translation_target)
if 'investigations' in options:
meta['investigations'] = self.get_investigations()
if 'link' in options:
Expand All @@ -262,6 +281,8 @@ def get_meta(self, options=None, timestamp=None):
meta['files-names'] = self.get_files_names()
if 'reactions' in options:
meta['reactions'] = self.get_reactions()
if 'translation' in options and translation_target:
meta['translation'] = self.get_translation(content=meta.get('content'), target=translation_target)

# meta['encoding'] = None
return meta
Expand Down
8 changes: 4 additions & 4 deletions bin/lib/objects/abstract_chat_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,12 +179,12 @@ def get_nb_message_this_week(self):
week_date = Date.get_current_week_day()
return self.get_nb_message_by_week(week_date)

def get_message_meta(self, message, timestamp=None): # TODO handle file message
def get_message_meta(self, message, timestamp=None, translation_target='en'): # TODO handle file message
message = Messages.Message(message[9:])
meta = message.get_meta(options={'content', 'files-names', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'user-account'}, timestamp=timestamp)
meta = message.get_meta(options={'content', 'files-names', 'images', 'link', 'parent', 'parent_meta', 'reactions', 'thread', 'translation', 'user-account'}, timestamp=timestamp, translation_target=translation_target)
return meta

def get_messages(self, start=0, page=1, nb=500, unread=False): # threads ???? # TODO ADD last/first message timestamp + return page
def get_messages(self, start=0, page=1, nb=500, unread=False, translation_target='en'): # threads ???? # TODO ADD last/first message timestamp + return page
# TODO return message meta
tags = {}
messages = {}
Expand All @@ -195,7 +195,7 @@ def get_messages(self, start=0, page=1, nb=500, unread=False): # threads ???? #
if date_day != curr_date:
messages[date_day] = []
curr_date = date_day
mess_dict = self.get_message_meta(message[0], timestamp=timestamp)
mess_dict = self.get_message_meta(message[0], timestamp=timestamp, translation_target=translation_target)
messages[date_day].append(mess_dict)

if mess_dict.get('tags'):
Expand Down
3 changes: 3 additions & 0 deletions configs/core.cfg.sample
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,9 @@ default_har = True
default_screenshot = True
onion_proxy = onion.foundation

[Translation]
libretranslate =

[IP]
# list of comma-separated CIDR that you wish to be alerted for. e.g:
#networks = 192.168.34.0/24,10.0.0.0/8,192.168.33.0/24
Expand Down
20 changes: 14 additions & 6 deletions var/www/blueprints/chats_explorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
##################################
from lib import ail_core
from lib import chats_viewer
from lib import Language
from lib import Tag

# ============ BLUEPRINT ============
Expand Down Expand Up @@ -80,12 +81,14 @@ def chats_explorer_instance():
def chats_explorer_chat():
chat_id = request.args.get('id')
instance_uuid = request.args.get('uuid')
chat = chats_viewer.api_get_chat(chat_id, instance_uuid)
target = request.args.get('target')
chat = chats_viewer.api_get_chat(chat_id, instance_uuid, translation_target=target)
if chat[1] != 200:
return create_json_response(chat[0], chat[1])
else:
chat = chat[0]
return render_template('chat_viewer.html', chat=chat, bootstrap_label=bootstrap_label)
languages = Language.get_translation_languages()
return render_template('chat_viewer.html', chat=chat, bootstrap_label=bootstrap_label, translation_languages=languages, translation_target=target)

@chats_explorer.route("chats/explorer/messages/stats/week", methods=['GET'])
@login_required
Expand All @@ -105,25 +108,29 @@ def chats_explorer_messages_stats_week():
def objects_subchannel_messages():
subchannel_id = request.args.get('id')
instance_uuid = request.args.get('uuid')
subchannel = chats_viewer.api_get_subchannel(subchannel_id, instance_uuid)
target = request.args.get('target')
subchannel = chats_viewer.api_get_subchannel(subchannel_id, instance_uuid, translation_target=target)
if subchannel[1] != 200:
return create_json_response(subchannel[0], subchannel[1])
else:
subchannel = subchannel[0]
return render_template('SubChannelMessages.html', subchannel=subchannel, bootstrap_label=bootstrap_label)
languages = Language.get_translation_languages()
return render_template('SubChannelMessages.html', subchannel=subchannel, bootstrap_label=bootstrap_label, translation_languages=languages, translation_target=target)

@chats_explorer.route("/chats/explorer/thread", methods=['GET'])
@login_required
@login_read_only
def objects_thread_messages():
thread_id = request.args.get('id')
instance_uuid = request.args.get('uuid')
thread = chats_viewer.api_get_thread(thread_id, instance_uuid)
target = request.args.get('target')
thread = chats_viewer.api_get_thread(thread_id, instance_uuid, translation_target=target)
if thread[1] != 200:
return create_json_response(thread[0], thread[1])
else:
meta = thread[0]
return render_template('ThreadMessages.html', meta=meta, bootstrap_label=bootstrap_label)
languages = Language.get_translation_languages()
return render_template('ThreadMessages.html', meta=meta, bootstrap_label=bootstrap_label, translation_languages=languages, translation_target=target)

@chats_explorer.route("/objects/message", methods=['GET'])
@login_required
Expand All @@ -135,5 +142,6 @@ def objects_message():
return create_json_response(message[0], message[1])
else:
message = message[0]
languages = Language.get_translation_languages()
return render_template('ChatMessage.html', meta=message, bootstrap_label=bootstrap_label,
modal_add_tags=Tag.get_modal_add_tags(message['id'], object_type='message'))
3 changes: 3 additions & 0 deletions var/www/templates/chats_explorer/SubChannelMessages.html
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,9 @@ <h3 class="text-secondary">{% if subchannel['chat']['name'] %}{{ subchannel['cha
<span class="mt-3">
{% include 'objects/image/block_blur_img_slider.html' %}
</span>
{% with translate_url=url_for('chats_explorer.objects_subchannel_messages', uuid=subchannel['subtype']), obj_id=subchannel['id'] %}
{% include 'chats_explorer/block_translation.html' %}
{% endwith %}

<div class="position-relative">
<div class="chat-messages p-2">
Expand Down
3 changes: 3 additions & 0 deletions var/www/templates/chats_explorer/ThreadMessages.html
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@
<span class="mt-3">
{% include 'objects/image/block_blur_img_slider.html' %}
</span>
{% with translate_url=url_for('chats_explorer.objects_thread_messages', uuid=meta['subtype']), obj_id=meta['id'] %}
{% include 'chats_explorer/block_translation.html' %}
{% endwith %}

<div class="position-relative">
<div class="chat-messages p-2">
Expand Down
8 changes: 8 additions & 0 deletions var/www/templates/chats_explorer/block_message.html
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@
{% endif %}
</div>
<pre class="my-0">{{ message['reply_to']['content'] }}</pre>
{% if message['reply_to']['translation'] %}
<hr class="m-1">
<div class="my-0 text-secondary">{{ message['reply_to']['translation'] }}</div>
{% endif %}
{% for tag in message['reply_to']['tags'] %}
<span class="badge badge-{{ bootstrap_label[loop.index0 % 5] }}">{{ tag }}</span>
{% endfor %}
Expand All @@ -71,6 +75,10 @@
{% endfor %}
{% endif %}
<pre class="my-0">{{ message['content'] }}</pre>
{% if message['translation'] %}
<hr class="m-1">
<pre class="my-0 text-secondary">{{ message['translation'] }}</pre>
{% endif %}
{% for reaction in message['reactions'] %}
<span class="border rounded px-1">{{ reaction }} {{ message['reactions'][reaction] }}</span>
{% endfor %}
Expand Down
Loading

0 comments on commit bef4e69

Please sign in to comment.