Merge pull request #343 from hellohaptik/develop

Develop to Master
hellohaptik · Feb 20, 2020 · da12170 · da12170
2 parents aec491a + 9ec72ba
commit da12170
Show file tree

Hide file tree

Showing 28 changed files with 102 additions and 31 deletions.
diff --git a/datastore/elastic_search/create.py b/datastore/elastic_search/create.py
@@ -1,4 +1,4 @@
-from utils import filter_kwargs
+from .utils import filter_kwargs
 
 log_prefix = 'datastore.elastic_search.create'
 

diff --git a/datastore/elastic_search/populate.py b/datastore/elastic_search/populate.py
@@ -14,6 +14,7 @@
 from external_api.constants import SENTENCE, ENTITIES
 from language_utilities.constant import ENGLISH_LANG
 from ner_constants import DICTIONARY_DATA_VARIANTS
+from six.moves import map
 
 # Local imports
 
@@ -106,7 +107,7 @@ def get_variants_dictionary_value_from_key(csv_file_path, dictionary_key, logger
         next(csv_reader)
         for data_row in csv_reader:
             try:
-                data = map(str.strip, data_row[1].split('|'))
+                data = list(map(str.strip, data_row[1].split('|')))
                 # remove empty strings
                 data = [variant for variant in data if variant]
                 dictionary_value[data_row[0].strip().replace('.', ' ')].extend(data)

diff --git a/datastore/elastic_search/query.py b/datastore/elastic_search/query.py
@@ -13,6 +13,8 @@
 from external_api.constants import SENTENCE, ENTITIES
 from language_utilities.constant import ENGLISH_LANG
 from lib.nlp.const import TOKENIZER
+from six.moves import range
+from six.moves import zip
 
 # Local imports
 

diff --git a/datastore/elastic_search/transfer.py b/datastore/elastic_search/transfer.py
@@ -202,7 +202,7 @@ def _scroll_over_es_return_object(self, results):
             total_records = results['hits']['total']
             for post in results['hits']['hits']:
                 data_new.append(post)
-            if '_scroll_id' in results.keys():
+            if '_scroll_id' in results:
                 scroll_size = len(results['hits']['hits'])
                 while (scroll_size > 0):
                     scroll_id = results['_scroll_id']
@@ -411,7 +411,7 @@ def fetch_index_alias_points_to(self, es_url, alias_name):
         response = requests.get(es_url + '/*/_alias/' + alias_name)
         if response.status_code == 200:
             json_obj = json.loads(response.content)
-            indices = json_obj.keys()
+            indices = list(json_obj.keys())
             if self.es_index_1 in indices:
                 return self.es_index_1
             elif self.es_index_2 in indices:

diff --git a/docker/Dockerfile-python3 b/docker/Dockerfile-python3
@@ -0,0 +1,44 @@
+# This is to automated chatbot_ner installation
+
+FROM python:3.6.10
+
+RUN apt-get update && apt-get install -y wget build-essential curl nginx supervisor
+
+WORKDIR /app
+
+
+COPY docker/install.sh initial_setup.py /app/
+COPY docker/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
+
+# cython is installed because pandas build fails otherwise
+RUN mkdir -p ~/model_lib && \
+    mkdir -p /root/models && \
+    /app/install.sh && \
+    touch /app/config && \
+    touch /app/model_config && \
+    pip install --no-cache-dir -I uwsgi && \
+    pip install cython
+
+COPY requirements.txt /app/requirements.txt
+
+RUN pip install --no-cache-dir -r /app/requirements.txt
+
+# From start_server.sh
+
+ENV NAME="chatbot_ner"
+ENV DJANGODIR=/app
+ENV NUM_WORKERS=4
+ENV DJANGO_SETTINGS_MODULE=chatbot_ner.settings
+ENV PORT=8081
+ENV TIMEOUT=600
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Important change this via .env (the file copied from config.example)
+ENV SECRET_KEY=!yqqcz-v@(s@kpygpvomcuu3il0q1&qtpz)e_g0ulo-sdv%c0c
+
+EXPOSE 8081
+
+ADD . /app
+
+# entrypoint/cmd script
+CMD /app/docker/cmd.sh
diff --git a/lib/nlp/levenshtein_distance.py b/lib/nlp/levenshtein_distance.py
@@ -1,3 +1,5 @@
+from six.moves import range
+
 def edit_distance(string1, string2, insertion_cost=1, deletion_cost=1, substitution_cost=2, max_distance=None):
     """
     Calculate the weighted levenshtein distance between two strings
@@ -30,7 +32,7 @@ def edit_distance(string1, string2, insertion_cost=1, deletion_cost=1, substitut
 
     if len(string1) > len(string2):
         string1, string2 = string2, string1
-    distances = range(len(string1) + 1)
+    distances = list(range(len(string1) + 1))
     for index2, char2 in enumerate(string2):
         new_distances = [index2 + 1]
         for index1, char1 in enumerate(string1):

diff --git a/models/crf/test.py b/models/crf/test.py
@@ -6,6 +6,7 @@
 from .constant import INBOUND, OUTBOUND
 from .output_generation.city import generate_city_output
 from .output_generation.date import generate_date_output
+from six.moves import range
 
 try:
     import CRFPP

diff --git a/models/crf_v2/crf_detect_entity.py b/models/crf_v2/crf_detect_entity.py
@@ -3,6 +3,7 @@
 from .get_crf_tagger import CrfModel
 from chatbot_ner.config import CRF_MODELS_PATH
 from models.crf_v2.constants import CRF_B_LABEL, CRF_I_LABEL
+from six.moves import range
 
 
 class CrfDetection(object):

diff --git a/models/crf_v2/crf_preprocess_data.py b/models/crf_v2/crf_preprocess_data.py
@@ -6,6 +6,8 @@
 from chatbot_ner.config import ner_logger
 from models.crf_v2.constants import SENTENCE_LIST, CRF_WORD_EMBEDDINGS, CRF_WORD_VEC_FEATURE, CRF_B_LABEL,\
     CRF_B_TAG, CRF_I_LABEL, CRF_I_TAG, CRF_POS_TAGS, CRF_LABELS, CRF_O_LABEL, CRF_BOS, CRF_EOS
+from six.moves import range
+from six.moves import zip
 
 
 class CrfPreprocessData(object):

diff --git a/models/crf_v2/crf_train.py b/models/crf_v2/crf_train.py
@@ -8,6 +8,7 @@
     ESCrfTrainingTextListNotFoundException
 from datetime import datetime
 import os
+from six.moves import zip
 
 
 class CrfTrain(object):

diff --git a/ner_v1/chatbot/combine_detection_logic.py b/ner_v1/chatbot/combine_detection_logic.py
@@ -92,7 +92,7 @@ def combine_output_of_detection_logic_and_tag(entity_data, text):
         else:
             final_entity_data[entity] = None
 
-    original_text_list = tag_preprocess_dict.keys()
+    original_text_list = list(tag_preprocess_dict.keys())
     original_text_list = sort_original_text(original_text_list)
     for original_text in original_text_list:
         tag = ''

diff --git a/ner_v1/detectors/base_detector.py b/ner_v1/detectors/base_detector.py
@@ -11,6 +11,7 @@
                            FROM_FALLBACK_VALUE, ORIGINAL_TEXT, ENTITY_VALUE, DETECTION_METHOD,
                            DETECTION_LANGUAGE, ENTITY_VALUE_DICT_KEY)
 from ner_v1.constant import DATASTORE_VERIFIED, MODEL_VERIFIED
+from six.moves import range
 
 try:
     import regex as re

diff --git a/ner_v1/detectors/numeral/budget/budget_detection.py b/ner_v1/detectors/numeral/budget/budget_detection.py
@@ -4,6 +4,7 @@
 from ner_v1.constant import BUDGET_TYPE_NORMAL, BUDGET_TYPE_TEXT
 from ner_v1.detectors.base_detector import BaseDetector
 from ner_v1.detectors.textual.text.text_detection import TextDetector
+from six.moves import zip
 
 
 class BudgetDetector(BaseDetector):
@@ -119,9 +120,9 @@ def __init__(self, entity_name, source_language_script=ENGLISH_LANG, translation
         self.tag = '__' + self.entity_name + '__'
         self._use_text_detection = use_text_detection
 
-        units, scales = zip(*sorted(
+        units, scales = list(zip(*sorted(
             list(BudgetDetector._scale_patterns.items()), key=lambda pattern_scale: len(pattern_scale[0]), reverse=True
-        ))
+        )))
         self._scale_compiled_patterns = [(scale, re.compile(unit)) for scale, unit in zip(scales, units)]
         digits_pattern = r'((?:\d+(?:\,\d+)*(?:\.\d+)?)|(?:(?:\d+(?:\,\d+)*)?(?:\.\d+)))'
         units_pattern = r'({})?'.format('|'.join(units))

diff --git a/ner_v1/detectors/temporal/date/date_detection.py b/ner_v1/detectors/temporal/date/date_detection.py
@@ -15,6 +15,8 @@
                              TYPE_THIS_DAY, TYPE_PAST,
                              TYPE_POSSIBLE_DAY, TYPE_REPEAT_DAY, WEEKDAYS, WEEKENDS, REPEAT_WEEKDAYS,
                              REPEAT_WEEKENDS, MONTH_DICT, DAY_DICT, TYPE_N_DAYS_AFTER)
+from six.moves import range
+from six.moves import zip
 
 
 class DateAdvancedDetector(object):

diff --git a/ner_v1/detectors/temporal/time/time_detection.py b/ner_v1/detectors/temporal/time/time_detection.py
@@ -1459,7 +1459,7 @@ def _remove_time_range_entities(self, time_list, original_list):
         time_list_final = []
         original_list_final = []
         for i, entity in enumerate(time_list):
-            if 'range' not in entity.keys():
+            if 'range' not in entity:
                 time_list_final.append(entity)
                 original_list_final.append(original_list[i])
             elif not entity['range']:

diff --git a/ner_v1/detectors/textual/name/name_detection.py b/ner_v1/detectors/textual/name/name_detection.py
@@ -11,6 +11,7 @@
                                                        HINDI_STOPWORDS, NAME_VARIATIONS,
                                                        COMMON_HINDI_WORDS_OCCURING_WITH_NAME)
 from ner_v1.detectors.textual.text.text_detection import TextDetector
+from six.moves import range
 
 
 # TODO: Refactor this module for readability and useability. Remove any hacks
@@ -110,7 +111,7 @@ def get_name_using_pos_tagger(self, text):
 
         entity_value, original_text = [], []
         pos_tagger_object = POS()
-        pattern1 = re.compile(r"name\s*(is|)\s*([\w\s]+)")
+        pattern1 = re.compile(r"name\s+(?:is\s+)?([\w\s]+)")
         pattern2 = re.compile(r"myself\s+([\w\s]+)")
         pattern3 = re.compile(r"call\s+me\s+([\w\s]+)")
         pattern4 = re.compile(r"i\s+am\s+([\w\s]+)")
@@ -128,7 +129,7 @@ def get_name_using_pos_tagger(self, text):
             return entity_value, original_text
 
         if pattern1_match:
-            entity_value, original_text = self.get_format_name(pattern1_match[0][1].split(), self.text)
+            entity_value, original_text = self.get_format_name(pattern1_match[0].split(), self.text)
 
         elif pattern2_match:
             entity_value, original_text = self.get_format_name(pattern2_match[0].split(), self.text)
@@ -521,7 +522,7 @@ def remove_emojis(self, text):
         Returns:
             text (str): text with emojis replaced with ''
         """
-        emoji_pattern = re.compile(ur'[{0}]+'.format(''.join(EMOJI_RANGES.values())), re.UNICODE)
+        emoji_pattern = re.compile(ur'[{0}]+'.format(''.join(list(EMOJI_RANGES.values()))), re.UNICODE)
         text = emoji_pattern.sub(repl='', string=text)
         return text
 

diff --git a/ner_v1/detectors/textual/name/tests/test_name_detection.py b/ner_v1/detectors/textual/name/tests/test_name_detection.py
@@ -7,6 +7,8 @@
 
 from ner_v1.constant import DATASTORE_VERIFIED, MODEL_VERIFIED
 from ner_v1.detectors.textual.name.name_detection import NameDetector
+from six.moves import range
+from six.moves import zip
 
 
 class NameDetectionTest(TestCase):
@@ -65,7 +67,7 @@ def test_person_name_detection(self):
             for d in detected_texts:
                 d.pop(MODEL_VERIFIED)
                 d.pop(DATASTORE_VERIFIED)
-            zipped = zip(detected_texts, original_texts)
+            zipped = list(zip(detected_texts, original_texts))
             self.assertEqual(expected_value, zipped)
 
     def generate_person_name_dict(self, person_name_dict):

diff --git a/ner_v1/detectors/textual/text/text_detection.py b/ner_v1/detectors/textual/text/text_detection.py
@@ -11,6 +11,7 @@
 from lib.nlp.levenshtein_distance import edit_distance
 from ner_v1.detectors.base_detector import BaseDetector
 from ner_constants import ENTITY_VALUE_DICT_KEY
+from six.moves import range
 
 try:
     import regex as re
@@ -445,7 +446,7 @@ def _text_detection_with_variants(self):
                     variant = variant.decode('utf-8')
 
                 variants_to_values[variant] = value
-            variants_list = variants_to_values.keys()
+            variants_list = list(variants_to_values.keys())
 
             # Length based ordering, this reorders the results from datastore
             # that are already sorted by some relevance scoring

diff --git a/ner_v2/detectors/numeral/number/number_detection.py b/ner_v2/detectors/numeral/number/number_detection.py
@@ -6,6 +6,7 @@
 from ner_v2.detectors.base_detector import BaseDetector
 from ner_v2.detectors.numeral.constant import NUMBER_DETECTION_RETURN_DICT_VALUE, NUMBER_DETECTION_RETURN_DICT_UNIT
 from ner_v2.detectors.utils import get_lang_data_path
+from six.moves import zip
 
 
 class NumberDetector(BaseDetector):

diff --git a/ner_v2/detectors/numeral/number/standard_number_detector.py b/ner_v2/detectors/numeral/number/standard_number_detector.py
@@ -2,6 +2,7 @@
 import pandas as pd
 import collections
 import os
+from six.moves import zip
 
 try:
     import regex as re
@@ -51,10 +52,10 @@ def __init__(self, entity_name, data_directory_path, unit_type=None):
         # Method to initialise value in regex
         self.init_regex_and_parser(data_directory_path)
 
-        sorted_len_units_keys = sorted(self.units_map.keys(), key=len, reverse=True)
+        sorted_len_units_keys = sorted(list(self.units_map.keys()), key=len, reverse=True)
         self.unit_choices = "|".join([re.escape(x) for x in sorted_len_units_keys])
 
-        sorted_len_scale_map = sorted(self.scale_map.keys(), key=len, reverse=True)
+        sorted_len_scale_map = sorted(list(self.scale_map.keys()), key=len, reverse=True)
         # using re.escape for strict matches in case pattern comes with '.' or '*', which should be escaped
         self.scale_map_choices = "|".join([re.escape(x) for x in sorted_len_scale_map])
 

diff --git a/ner_v2/detectors/numeral/number_range/standard_number_range_detector.py b/ner_v2/detectors/numeral/number_range/standard_number_range_detector.py
@@ -7,6 +7,8 @@
 import ner_v2.detectors.numeral.constant as numeral_constant
 from ner_v2.detectors.numeral.utils import get_list_from_pipe_sep_string
 from ner_v2.detectors.numeral.number.number_detection import NumberDetector
+from six.moves import zip
+
 try:
     import regex as re
     _re_flags = re.UNICODE | re.V1 | re.WORD
@@ -126,7 +128,8 @@ def _tag_number_in_text(self, processed_text):
             i want to buy __number__1 apples and more than __number__0 bananas
         """
         tagged_number_text = processed_text
-        sorted_number_detected_map = sorted(self.number_detected_map.items(), key=lambda kv: len(kv[1].original_text),
+        sorted_number_detected_map = sorted(list(self.number_detected_map.items()),
+                                            key=lambda kv: len(kv[1].original_text),
                                             reverse=True)
         for number_tag in sorted_number_detected_map:
             tagged_number_text = tagged_number_text.replace(number_tag[1].original_text, number_tag[0], 1)

diff --git a/ner_v2/detectors/numeral/utils.py b/ner_v2/detectors/numeral/utils.py
@@ -1,5 +1,5 @@
 import re
-
+from six.moves import range
 
 def get_number_from_number_word(text, number_word_dict):
     """

diff --git a/ner_v2/detectors/pattern/phone_number/phone_number_detection.py b/ner_v2/detectors/pattern/phone_number/phone_number_detection.py
@@ -4,6 +4,7 @@
 from language_utilities.constant import ENGLISH_LANG
 import re
 import phonenumbers
+from six.moves import zip
 
 
 class PhoneDetector(BaseDetector):

diff --git a/ner_v2/detectors/temporal/date/date_detection.py b/ner_v2/detectors/temporal/date/date_detection.py
@@ -7,7 +7,8 @@
 import os
 import re
 
-import six
+from six.moves import range
+from six.moves import zip
 
 import models.crf.constant as model_constant
 import ner_v2.detectors.temporal.constant as temporal_constant
@@ -230,7 +231,7 @@ def _detect_range(self):
                 parts = re.split(r'\s+(?:\-|to|till|se)\s+', sentence_part)
                 skip_next_pair = False
                 _day_of_week_types = [temporal_constant.TYPE_THIS_DAY, temporal_constant.TYPE_NEXT_DAY]
-                for start_part, end_part in six.moves.zip(parts, parts[1:]):
+                for start_part, end_part in zip(parts, parts[1:]):
                     if skip_next_pair:
                         skip_next_pair = False
                         continue

diff --git a/ner_v2/detectors/temporal/date/en/date_detection.py b/ner_v2/detectors/temporal/date/en/date_detection.py
@@ -9,6 +9,7 @@
                                                 MONTH_DICT, DAY_DICT, ORDINALS_MAP)
 from ner_v2.detectors.temporal.utils import (get_weekdays_for_month, get_next_date_with_dd, get_previous_date_with_dd,
                                              get_timezone)
+from six.moves import zip
 
 
 class DateDetector(object):
@@ -1651,7 +1652,7 @@ def _day_range_for_nth_week_month(self, date_list=None, original_list=None):
             original_list = []
         if date_list is None:
             date_list = []
-        ordinal_choices = "|".join(ORDINALS_MAP.keys())
+        ordinal_choices = "|".join(list(ORDINALS_MAP.keys()))
         regex_pattern = re.compile(r'((' + ordinal_choices + r')\s+week\s+(of\s+)?([A-Za-z]+)(?:\s+month)?)\s+')
         patterns = regex_pattern.findall(self.processed_text.lower())
         for pattern in patterns:

diff --git a/ner_v2/detectors/temporal/time/en/time_detection.py b/ner_v2/detectors/temporal/time/en/time_detection.py
@@ -95,7 +95,7 @@ def __init__(self, entity_name, timezone=None):
 
         self.init_regex_and_parser(os.path.join((os.path.dirname(os.path.abspath(__file__)).rstrip(os.sep)),
                                                 LANGUAGE_DATA_DIRECTORY))
-        sorted_len_timezone_keys = sorted(self.timezones_map.keys(), key=len, reverse=True)
+        sorted_len_timezone_keys = sorted(list(self.timezones_map.keys()), key=len, reverse=True)
         self.timezone_choices = "|".join([re.escape(x.lower()) for x in sorted_len_timezone_keys])
 
     def set_bot_message(self, bot_message):
@@ -1738,7 +1738,7 @@ def _remove_time_range_entities(self, time_list, original_list):
         time_list_final = []
         original_list_final = []
         for i, entity in enumerate(time_list):
-            if 'range' not in entity.keys():
+            if 'range' not in entity:
                 time_list_final.append(entity)
                 original_list_final.append(original_list[i])
             elif not entity['range']:

diff --git a/ner_v2/detectors/temporal/utils.py b/ner_v2/detectors/temporal/utils.py
@@ -7,7 +7,7 @@
 
 from chatbot_ner.config import ner_logger
 from ner_v2.detectors.temporal.constant import POSITIVE_TIME_DIFF, NEGATIVE_TIME_DIFF, CONSTANT_FILE_KEY
-
+from six.moves import range
 
 def nth_weekday(weekday, n, ref_date):
     """