Skip to content

Commit

Permalink
Merge pull request #343 from hellohaptik/develop
Browse files Browse the repository at this point in the history
Develop to Master
  • Loading branch information
chiragjn authored Feb 20, 2020
2 parents aec491a + 9ec72ba commit da12170
Show file tree
Hide file tree
Showing 28 changed files with 102 additions and 31 deletions.
2 changes: 1 addition & 1 deletion datastore/elastic_search/create.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from utils import filter_kwargs
from .utils import filter_kwargs

log_prefix = 'datastore.elastic_search.create'

Expand Down
3 changes: 2 additions & 1 deletion datastore/elastic_search/populate.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from external_api.constants import SENTENCE, ENTITIES
from language_utilities.constant import ENGLISH_LANG
from ner_constants import DICTIONARY_DATA_VARIANTS
from six.moves import map

# Local imports

Expand Down Expand Up @@ -106,7 +107,7 @@ def get_variants_dictionary_value_from_key(csv_file_path, dictionary_key, logger
next(csv_reader)
for data_row in csv_reader:
try:
data = map(str.strip, data_row[1].split('|'))
data = list(map(str.strip, data_row[1].split('|')))
# remove empty strings
data = [variant for variant in data if variant]
dictionary_value[data_row[0].strip().replace('.', ' ')].extend(data)
Expand Down
2 changes: 2 additions & 0 deletions datastore/elastic_search/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from external_api.constants import SENTENCE, ENTITIES
from language_utilities.constant import ENGLISH_LANG
from lib.nlp.const import TOKENIZER
from six.moves import range
from six.moves import zip

# Local imports

Expand Down
4 changes: 2 additions & 2 deletions datastore/elastic_search/transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def _scroll_over_es_return_object(self, results):
total_records = results['hits']['total']
for post in results['hits']['hits']:
data_new.append(post)
if '_scroll_id' in results.keys():
if '_scroll_id' in results:
scroll_size = len(results['hits']['hits'])
while (scroll_size > 0):
scroll_id = results['_scroll_id']
Expand Down Expand Up @@ -411,7 +411,7 @@ def fetch_index_alias_points_to(self, es_url, alias_name):
response = requests.get(es_url + '/*/_alias/' + alias_name)
if response.status_code == 200:
json_obj = json.loads(response.content)
indices = json_obj.keys()
indices = list(json_obj.keys())
if self.es_index_1 in indices:
return self.es_index_1
elif self.es_index_2 in indices:
Expand Down
44 changes: 44 additions & 0 deletions docker/Dockerfile-python3
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# This is to automated chatbot_ner installation

FROM python:3.6.10

RUN apt-get update && apt-get install -y wget build-essential curl nginx supervisor

WORKDIR /app


COPY docker/install.sh initial_setup.py /app/
COPY docker/supervisord.conf /etc/supervisor/conf.d/supervisord.conf

# cython is installed because pandas build fails otherwise
RUN mkdir -p ~/model_lib && \
mkdir -p /root/models && \
/app/install.sh && \
touch /app/config && \
touch /app/model_config && \
pip install --no-cache-dir -I uwsgi && \
pip install cython

COPY requirements.txt /app/requirements.txt

RUN pip install --no-cache-dir -r /app/requirements.txt

# From start_server.sh

ENV NAME="chatbot_ner"
ENV DJANGODIR=/app
ENV NUM_WORKERS=4
ENV DJANGO_SETTINGS_MODULE=chatbot_ner.settings
ENV PORT=8081
ENV TIMEOUT=600
ENV DEBIAN_FRONTEND=noninteractive

# Important change this via .env (the file copied from config.example)
ENV SECRET_KEY=!yqqcz-v@(s@kpygpvomcuu3il0q1&qtpz)e_g0ulo-sdv%c0c

EXPOSE 8081

ADD . /app

# entrypoint/cmd script
CMD /app/docker/cmd.sh
4 changes: 3 additions & 1 deletion lib/nlp/levenshtein_distance.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from six.moves import range

def edit_distance(string1, string2, insertion_cost=1, deletion_cost=1, substitution_cost=2, max_distance=None):
"""
Calculate the weighted levenshtein distance between two strings
Expand Down Expand Up @@ -30,7 +32,7 @@ def edit_distance(string1, string2, insertion_cost=1, deletion_cost=1, substitut

if len(string1) > len(string2):
string1, string2 = string2, string1
distances = range(len(string1) + 1)
distances = list(range(len(string1) + 1))
for index2, char2 in enumerate(string2):
new_distances = [index2 + 1]
for index1, char1 in enumerate(string1):
Expand Down
1 change: 1 addition & 0 deletions models/crf/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .constant import INBOUND, OUTBOUND
from .output_generation.city import generate_city_output
from .output_generation.date import generate_date_output
from six.moves import range

try:
import CRFPP
Expand Down
1 change: 1 addition & 0 deletions models/crf_v2/crf_detect_entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .get_crf_tagger import CrfModel
from chatbot_ner.config import CRF_MODELS_PATH
from models.crf_v2.constants import CRF_B_LABEL, CRF_I_LABEL
from six.moves import range


class CrfDetection(object):
Expand Down
2 changes: 2 additions & 0 deletions models/crf_v2/crf_preprocess_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from chatbot_ner.config import ner_logger
from models.crf_v2.constants import SENTENCE_LIST, CRF_WORD_EMBEDDINGS, CRF_WORD_VEC_FEATURE, CRF_B_LABEL,\
CRF_B_TAG, CRF_I_LABEL, CRF_I_TAG, CRF_POS_TAGS, CRF_LABELS, CRF_O_LABEL, CRF_BOS, CRF_EOS
from six.moves import range
from six.moves import zip


class CrfPreprocessData(object):
Expand Down
1 change: 1 addition & 0 deletions models/crf_v2/crf_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
ESCrfTrainingTextListNotFoundException
from datetime import datetime
import os
from six.moves import zip


class CrfTrain(object):
Expand Down
2 changes: 1 addition & 1 deletion ner_v1/chatbot/combine_detection_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def combine_output_of_detection_logic_and_tag(entity_data, text):
else:
final_entity_data[entity] = None

original_text_list = tag_preprocess_dict.keys()
original_text_list = list(tag_preprocess_dict.keys())
original_text_list = sort_original_text(original_text_list)
for original_text in original_text_list:
tag = ''
Expand Down
1 change: 1 addition & 0 deletions ner_v1/detectors/base_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
FROM_FALLBACK_VALUE, ORIGINAL_TEXT, ENTITY_VALUE, DETECTION_METHOD,
DETECTION_LANGUAGE, ENTITY_VALUE_DICT_KEY)
from ner_v1.constant import DATASTORE_VERIFIED, MODEL_VERIFIED
from six.moves import range

try:
import regex as re
Expand Down
5 changes: 3 additions & 2 deletions ner_v1/detectors/numeral/budget/budget_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from ner_v1.constant import BUDGET_TYPE_NORMAL, BUDGET_TYPE_TEXT
from ner_v1.detectors.base_detector import BaseDetector
from ner_v1.detectors.textual.text.text_detection import TextDetector
from six.moves import zip


class BudgetDetector(BaseDetector):
Expand Down Expand Up @@ -119,9 +120,9 @@ def __init__(self, entity_name, source_language_script=ENGLISH_LANG, translation
self.tag = '__' + self.entity_name + '__'
self._use_text_detection = use_text_detection

units, scales = zip(*sorted(
units, scales = list(zip(*sorted(
list(BudgetDetector._scale_patterns.items()), key=lambda pattern_scale: len(pattern_scale[0]), reverse=True
))
)))
self._scale_compiled_patterns = [(scale, re.compile(unit)) for scale, unit in zip(scales, units)]
digits_pattern = r'((?:\d+(?:\,\d+)*(?:\.\d+)?)|(?:(?:\d+(?:\,\d+)*)?(?:\.\d+)))'
units_pattern = r'({})?'.format('|'.join(units))
Expand Down
2 changes: 2 additions & 0 deletions ner_v1/detectors/temporal/date/date_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
TYPE_THIS_DAY, TYPE_PAST,
TYPE_POSSIBLE_DAY, TYPE_REPEAT_DAY, WEEKDAYS, WEEKENDS, REPEAT_WEEKDAYS,
REPEAT_WEEKENDS, MONTH_DICT, DAY_DICT, TYPE_N_DAYS_AFTER)
from six.moves import range
from six.moves import zip


class DateAdvancedDetector(object):
Expand Down
2 changes: 1 addition & 1 deletion ner_v1/detectors/temporal/time/time_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -1459,7 +1459,7 @@ def _remove_time_range_entities(self, time_list, original_list):
time_list_final = []
original_list_final = []
for i, entity in enumerate(time_list):
if 'range' not in entity.keys():
if 'range' not in entity:
time_list_final.append(entity)
original_list_final.append(original_list[i])
elif not entity['range']:
Expand Down
7 changes: 4 additions & 3 deletions ner_v1/detectors/textual/name/name_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
HINDI_STOPWORDS, NAME_VARIATIONS,
COMMON_HINDI_WORDS_OCCURING_WITH_NAME)
from ner_v1.detectors.textual.text.text_detection import TextDetector
from six.moves import range


# TODO: Refactor this module for readability and useability. Remove any hacks
Expand Down Expand Up @@ -110,7 +111,7 @@ def get_name_using_pos_tagger(self, text):

entity_value, original_text = [], []
pos_tagger_object = POS()
pattern1 = re.compile(r"name\s*(is|)\s*([\w\s]+)")
pattern1 = re.compile(r"name\s+(?:is\s+)?([\w\s]+)")
pattern2 = re.compile(r"myself\s+([\w\s]+)")
pattern3 = re.compile(r"call\s+me\s+([\w\s]+)")
pattern4 = re.compile(r"i\s+am\s+([\w\s]+)")
Expand All @@ -128,7 +129,7 @@ def get_name_using_pos_tagger(self, text):
return entity_value, original_text

if pattern1_match:
entity_value, original_text = self.get_format_name(pattern1_match[0][1].split(), self.text)
entity_value, original_text = self.get_format_name(pattern1_match[0].split(), self.text)

elif pattern2_match:
entity_value, original_text = self.get_format_name(pattern2_match[0].split(), self.text)
Expand Down Expand Up @@ -521,7 +522,7 @@ def remove_emojis(self, text):
Returns:
text (str): text with emojis replaced with ''
"""
emoji_pattern = re.compile(ur'[{0}]+'.format(''.join(EMOJI_RANGES.values())), re.UNICODE)
emoji_pattern = re.compile(ur'[{0}]+'.format(''.join(list(EMOJI_RANGES.values()))), re.UNICODE)
text = emoji_pattern.sub(repl='', string=text)
return text

Expand Down
4 changes: 3 additions & 1 deletion ner_v1/detectors/textual/name/tests/test_name_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

from ner_v1.constant import DATASTORE_VERIFIED, MODEL_VERIFIED
from ner_v1.detectors.textual.name.name_detection import NameDetector
from six.moves import range
from six.moves import zip


class NameDetectionTest(TestCase):
Expand Down Expand Up @@ -65,7 +67,7 @@ def test_person_name_detection(self):
for d in detected_texts:
d.pop(MODEL_VERIFIED)
d.pop(DATASTORE_VERIFIED)
zipped = zip(detected_texts, original_texts)
zipped = list(zip(detected_texts, original_texts))
self.assertEqual(expected_value, zipped)

def generate_person_name_dict(self, person_name_dict):
Expand Down
3 changes: 2 additions & 1 deletion ner_v1/detectors/textual/text/text_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from lib.nlp.levenshtein_distance import edit_distance
from ner_v1.detectors.base_detector import BaseDetector
from ner_constants import ENTITY_VALUE_DICT_KEY
from six.moves import range

try:
import regex as re
Expand Down Expand Up @@ -445,7 +446,7 @@ def _text_detection_with_variants(self):
variant = variant.decode('utf-8')

variants_to_values[variant] = value
variants_list = variants_to_values.keys()
variants_list = list(variants_to_values.keys())

# Length based ordering, this reorders the results from datastore
# that are already sorted by some relevance scoring
Expand Down
1 change: 1 addition & 0 deletions ner_v2/detectors/numeral/number/number_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from ner_v2.detectors.base_detector import BaseDetector
from ner_v2.detectors.numeral.constant import NUMBER_DETECTION_RETURN_DICT_VALUE, NUMBER_DETECTION_RETURN_DICT_UNIT
from ner_v2.detectors.utils import get_lang_data_path
from six.moves import zip


class NumberDetector(BaseDetector):
Expand Down
5 changes: 3 additions & 2 deletions ner_v2/detectors/numeral/number/standard_number_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pandas as pd
import collections
import os
from six.moves import zip

try:
import regex as re
Expand Down Expand Up @@ -51,10 +52,10 @@ def __init__(self, entity_name, data_directory_path, unit_type=None):
# Method to initialise value in regex
self.init_regex_and_parser(data_directory_path)

sorted_len_units_keys = sorted(self.units_map.keys(), key=len, reverse=True)
sorted_len_units_keys = sorted(list(self.units_map.keys()), key=len, reverse=True)
self.unit_choices = "|".join([re.escape(x) for x in sorted_len_units_keys])

sorted_len_scale_map = sorted(self.scale_map.keys(), key=len, reverse=True)
sorted_len_scale_map = sorted(list(self.scale_map.keys()), key=len, reverse=True)
# using re.escape for strict matches in case pattern comes with '.' or '*', which should be escaped
self.scale_map_choices = "|".join([re.escape(x) for x in sorted_len_scale_map])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import ner_v2.detectors.numeral.constant as numeral_constant
from ner_v2.detectors.numeral.utils import get_list_from_pipe_sep_string
from ner_v2.detectors.numeral.number.number_detection import NumberDetector
from six.moves import zip

try:
import regex as re
_re_flags = re.UNICODE | re.V1 | re.WORD
Expand Down Expand Up @@ -126,7 +128,8 @@ def _tag_number_in_text(self, processed_text):
i want to buy __number__1 apples and more than __number__0 bananas
"""
tagged_number_text = processed_text
sorted_number_detected_map = sorted(self.number_detected_map.items(), key=lambda kv: len(kv[1].original_text),
sorted_number_detected_map = sorted(list(self.number_detected_map.items()),
key=lambda kv: len(kv[1].original_text),
reverse=True)
for number_tag in sorted_number_detected_map:
tagged_number_text = tagged_number_text.replace(number_tag[1].original_text, number_tag[0], 1)
Expand Down
2 changes: 1 addition & 1 deletion ner_v2/detectors/numeral/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import re

from six.moves import range

def get_number_from_number_word(text, number_word_dict):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from language_utilities.constant import ENGLISH_LANG
import re
import phonenumbers
from six.moves import zip


class PhoneDetector(BaseDetector):
Expand Down
5 changes: 3 additions & 2 deletions ner_v2/detectors/temporal/date/date_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
import os
import re

import six
from six.moves import range
from six.moves import zip

import models.crf.constant as model_constant
import ner_v2.detectors.temporal.constant as temporal_constant
Expand Down Expand Up @@ -230,7 +231,7 @@ def _detect_range(self):
parts = re.split(r'\s+(?:\-|to|till|se)\s+', sentence_part)
skip_next_pair = False
_day_of_week_types = [temporal_constant.TYPE_THIS_DAY, temporal_constant.TYPE_NEXT_DAY]
for start_part, end_part in six.moves.zip(parts, parts[1:]):
for start_part, end_part in zip(parts, parts[1:]):
if skip_next_pair:
skip_next_pair = False
continue
Expand Down
3 changes: 2 additions & 1 deletion ner_v2/detectors/temporal/date/en/date_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
MONTH_DICT, DAY_DICT, ORDINALS_MAP)
from ner_v2.detectors.temporal.utils import (get_weekdays_for_month, get_next_date_with_dd, get_previous_date_with_dd,
get_timezone)
from six.moves import zip


class DateDetector(object):
Expand Down Expand Up @@ -1651,7 +1652,7 @@ def _day_range_for_nth_week_month(self, date_list=None, original_list=None):
original_list = []
if date_list is None:
date_list = []
ordinal_choices = "|".join(ORDINALS_MAP.keys())
ordinal_choices = "|".join(list(ORDINALS_MAP.keys()))
regex_pattern = re.compile(r'((' + ordinal_choices + r')\s+week\s+(of\s+)?([A-Za-z]+)(?:\s+month)?)\s+')
patterns = regex_pattern.findall(self.processed_text.lower())
for pattern in patterns:
Expand Down
4 changes: 2 additions & 2 deletions ner_v2/detectors/temporal/time/en/time_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def __init__(self, entity_name, timezone=None):

self.init_regex_and_parser(os.path.join((os.path.dirname(os.path.abspath(__file__)).rstrip(os.sep)),
LANGUAGE_DATA_DIRECTORY))
sorted_len_timezone_keys = sorted(self.timezones_map.keys(), key=len, reverse=True)
sorted_len_timezone_keys = sorted(list(self.timezones_map.keys()), key=len, reverse=True)
self.timezone_choices = "|".join([re.escape(x.lower()) for x in sorted_len_timezone_keys])

def set_bot_message(self, bot_message):
Expand Down Expand Up @@ -1738,7 +1738,7 @@ def _remove_time_range_entities(self, time_list, original_list):
time_list_final = []
original_list_final = []
for i, entity in enumerate(time_list):
if 'range' not in entity.keys():
if 'range' not in entity:
time_list_final.append(entity)
original_list_final.append(original_list[i])
elif not entity['range']:
Expand Down
2 changes: 1 addition & 1 deletion ner_v2/detectors/temporal/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from chatbot_ner.config import ner_logger
from ner_v2.detectors.temporal.constant import POSITIVE_TIME_DIFF, NEGATIVE_TIME_DIFF, CONSTANT_FILE_KEY

from six.moves import range

def nth_weekday(weekday, n, ref_date):
"""
Expand Down
Loading

0 comments on commit da12170

Please sign in to comment.