Merge pull request #417 from hellohaptik/develop

Develop to Master 2021-03-04T13:57:00
hellohaptik · Mar 5, 2021 · 44b78ec · 44b78ec
2 parents 4ca5de4 + a4a6905
commit 44b78ec
Show file tree

Hide file tree

Showing 30 changed files with 982 additions and 919 deletions.
diff --git a/.coveragerc b/.coveragerc
@@ -0,0 +1,41 @@
+[run]
+source = .
+omit =
+    *.pyc
+    *.pyo
+    */site-packages/*
+    */distutils/*
+    docs/
+    docker/
+    logs/
+    postman_tests/
+    */tests/*
+    */test.py
+    */tests.py
+    manage.py
+    */settings.py
+    */urls.py
+    */migrations/*
+    *wsgi.py
+    *__init__.py
+
+[report]
+skip_empty = True
+sort = Cover
+exclude_lines =
+    pragma: no cover
+
+    # Don't complain about missing debug-only code:
+    def __repr__
+    if self\.debug
+
+    # Don't complain if tests don't hit defensive assertion code:
+    raise AssertionError
+    raise NotImplementedError
+
+    # Don't complain if non-runnable code isn't run:
+    if 0:
+    if __name__ == .__main__.:
+
+    __author__ = 'haptik'
+show_missing = True
diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml
@@ -19,5 +19,5 @@ categories:
     label: packages-updated
   - title: 👺 Miscellaneous 
     label: miscellaneous
-exclude-labels:
-  - miscellaneous
+# exclude-labels:
+#  - miscellaneous
diff --git a/.gitignore b/.gitignore
@@ -103,7 +103,7 @@ ENV/
 /newrelic.ini
 sftp-config.json
 .DS_Store
-logs/
+logs/*.log*
 
 .vscode
 newman_reports/

diff --git a/.whitesource b/.whitesource
@@ -0,0 +1,12 @@
+{
+  "scanSettings": {
+    "baseBranches": []
+  },
+  "checkRunSettings": {
+    "vulnerableCheckRunConclusionLevel": "failure",
+    "displayMode": "diff"
+  },
+  "issueSettings": {
+    "minSeverityLevel": "LOW"
+  }
+}
diff --git a/chatbot_ner/config.py b/chatbot_ner/config.py
@@ -13,14 +13,10 @@
 LOG_PATH = os.path.join(BASE_DIR, 'logs')
 
 # TODO: Set this up via Django LOGGING
-# SET UP NER LOGGING
-if not os.path.exists(LOG_PATH):
-    os.makedirs(LOG_PATH)
-
 LOG_LEVEL = os.environ.get('DJANGO_LOG_LEVEL', 'error').upper()
 
 # Common formatter
-formatter = logging.Formatter("%(asctime)s\t%(levelname)s\t%(message)s", "%Y-%m-%d %H:%M:%S")
+formatter = logging.Formatter("%(asctime)s %(levelname)s %(message)s %(module)s:%(lineno)d")
 
 # Handler for Docker stdout
 handler_stdout = logging.StreamHandler()
@@ -29,28 +25,14 @@
 
 # SETUP NER LOGGING
 NER_LOG_FILENAME = os.path.join(LOG_PATH, 'ner_log.log')
-# Set up a specific logger with our desired output level
-ner_logger = logging.getLogger('NERLogger')
-ner_logger.setLevel(LOG_LEVEL)
-# Add the log message handler to the logger
 handler = logging.handlers.WatchedFileHandler(NER_LOG_FILENAME)
-# handler = logging.handlers.RotatingFileHandler(NER_LOG_FILENAME, maxBytes=10 * 1024 * 1024, backupCount=5)
 handler.setFormatter(formatter)
+
+ner_logger = logging.getLogger('NERLogger')
+ner_logger.setLevel(LOG_LEVEL)
 ner_logger.addHandler(handler)
 ner_logger.addHandler(handler_stdout)
 
-# SETUP NLP LIB LOGGING
-NLP_LIB_LOG_FILENAME = os.path.join(LOG_PATH, 'nlp_log.log')
-# Set up a specific logger with our desired output level
-nlp_logger = logging.getLogger('NLPLibLogger')
-nlp_logger.setLevel(LOG_LEVEL)
-# Add the log message handler to the logger
-handler = logging.handlers.WatchedFileHandler(NLP_LIB_LOG_FILENAME)
-# handler = logging.handlers.RotatingFileHandler(NLP_LIB_LOG_FILENAME, maxBytes=10 * 1024 * 1024, backupCount=5)
-handler.setFormatter(formatter)
-nlp_logger.addHandler(handler)
-nlp_logger.addHandler(handler_stdout)
-
 ENGINE = os.environ.get('ENGINE')
 # ES settings (Mandatory to use Text type entities)
 ES_SCHEME = os.environ.get('ES_SCHEME', 'http')

diff --git a/chatbot_ner/settings.py b/chatbot_ner/settings.py
@@ -10,12 +10,14 @@
 
 # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
 from __future__ import absolute_import
+
 import os
 import sys
 
 from chatbot_ner.setup_sentry import setup_sentry
 
 BASE_DIR = os.path.dirname(os.path.dirname(__file__))
+ENVIRONMENT = os.environ.get('ENVIRONMENT') or os.environ.get('HAPTIK_ENV')
 
 # Quick-start development settings - unsuitable for production
 # See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/
@@ -59,6 +61,28 @@
     'django.middleware.clickjacking.XFrameOptionsMiddleware',
 ]
 
+# APM
+_elastic_apm_enabled = (os.environ.get('ELASTIC_APM_ENABLED') or '').strip().lower()
+ELASTIC_APM_ENABLED = (_elastic_apm_enabled == 'true') and 'test' not in sys.argv
+ELASTIC_APM_SERVER_URL = os.environ.get('ELASTIC_APM_SERVER_URL')
+if ELASTIC_APM_ENABLED:
+    ELASTIC_APM = {
+        'DEBUG': DEBUG,
+        'SERVICE_NAME': 'chatbot_ner',
+        'SERVER_URL': ELASTIC_APM_SERVER_URL,
+        'SPAN_FRAMES_MIN_DURATION': '5ms',
+        'STACK_TRACE_LIMIT': 500,
+        'ENVIRONMENT': ENVIRONMENT,
+        'TRANSACTION_SAMPLE_RATE': '0.1',
+        'TRANSACTION_MAX_SPANS': 500,
+        'INSTRUMENT': 'True',
+        'DISABLE_SEND': 'False',
+        'CAPTURE_BODY': 'off',
+        'SERVER_TIMEOUT': '2s',
+    }
+    INSTALLED_APPS.append('elasticapm.contrib.django')
+    MIDDLEWARE.append('elasticapm.contrib.django.middleware.TracingMiddleware')
+
 ROOT_URLCONF = 'chatbot_ner.urls'
 
 WSGI_APPLICATION = 'chatbot_ner.wsgi.application'
@@ -96,27 +120,25 @@ def __getitem__(self, item):
         'CONN_MAX_AGE': 60
     }
 
-#    MIGRATION_MODULES = DisableMigrations()
-
-
 TEST_RUNNER = 'django_nose.NoseTestSuiteRunner'
 NOSE_ARGS = [
     '--nocapture',
     '--nologcapture',
     '--verbosity=3',
-    '--ignore-files=urls.py',
-    '--ignore-files=wsgi.py',
+    '--exclude-dir=chatbot_ner/',
+    '--exclude-dir=docs/',
+    '--exclude-dir=docker/',
+    '--exclude-dir=data/',
     '--ignore-files=manage.py',
     '--ignore-files=nltk_setup.py',
     '--ignore-files=__init__.py',
     '--ignore-files=const.py',
     '--ignore-files=constant.py',
     '--ignore-files=constants.py',
-    '--ignore-files=settings.py',
     '--ignore-files=run_postman_tests.py',
-    '--exclude-dir=docs/',
-    '--exclude-dir=docker/',
-    '--exclude-dir=data/',
+    '--cover-erase',
+    '--cover-package=datastore,external_api,language_utilities,lib,models,ner_v1,ner_v2',
+    '--cover-inclusive',
 ]
 
 # Internationalization

diff --git a/chatbot_ner/setup_sentry.py b/chatbot_ner/setup_sentry.py
@@ -9,8 +9,8 @@
 
 # Support for Sentry DSN
 SENTRY_DSN = os.environ.get('SENTRY_DSN')
-SENTRY_ENABLED = os.environ.get('SENTRY_ENABLED')
-SENTRY_ENABLED = True if SENTRY_ENABLED == 'True' and 'test' not in sys.argv else False
+_sentry_enabled = (os.environ.get('SENTRY_ENABLED') or '').strip().lower()
+SENTRY_ENABLED = (_sentry_enabled == 'true' and 'test' not in sys.argv)
 
 
 def setup_sentry():

diff --git a/logs/.gitkeep b/logs/.gitkeep
diff --git a/ner_v1/detectors/textual/name/hindi_const.py → .../detectors/textual/name/lang_constants.py b/ner_v1/detectors/textual/name/hindi_const.py → .../detectors/textual/name/lang_constants.py
diff --git a/ner_v1/detectors/textual/name/name_detection.py b/ner_v1/detectors/textual/name/name_detection.py
@@ -9,14 +9,13 @@
 from language_utilities.constant import (ENGLISH_LANG, INDIC_LANGUAGES_SET, EUROPEAN_LANGUAGES_SET)
 from ner_v1.constant import DATASTORE_VERIFIED, MODEL_VERIFIED
 from ner_v1.constant import EMOJI_RANGES, FIRST_NAME, MIDDLE_NAME, LAST_NAME
-from ner_v1.detectors.textual.name.hindi_const import (INDIC_BADWORDS, INDIC_QUESTIONWORDS,
-                                                       INDIC_STOPWORDS, NAME_VARIATIONS, INDIC_UNICODE_RANGE,
-                                                       COMMON_INDIC_WORDS_OCCURRING_WITH_NAME)
+from ner_v1.detectors.textual.name.lang_constants import (INDIC_BADWORDS, INDIC_QUESTIONWORDS,
+                                                          INDIC_STOPWORDS, NAME_VARIATIONS, INDIC_UNICODE_RANGE,
+                                                          COMMON_INDIC_WORDS_OCCURRING_WITH_NAME)
 from six.moves import range
 
 
 # TODO: Refactor this module for readability and useability. Remove any hacks
-# TODO: Make this module python 3 compatible
 
 class NameDetector(object):
     """
@@ -62,21 +61,24 @@ def get_format_name(name_tokens, text):
         2.The original text.
 
         Args:
-            name_tokens (list): List of tokens in the name
-            Example:
-                 ['yash', 'doshi']
+            name_tokens (list): List of tokens in the name. e.g. ['yash', 'doshi']
 
         Returns:
-            (
-                [{first_name: "yash", middle_name: None, last_name: "doshi"}],
-                ["yash modi"]
-            )
+            (list, list): tuple containing
+                list: list of dictionaries, one for each detected name
+                list: list of str, the original text span for each detected name
+
+        Examples:
+            >>> NameDetector.get_format_name(['yash', 'p.', 'm.', 'doshi'], 'my name is yash p. m. doshi')
+            ([{first_name: 'yash', middle_name: 'p. m.', last_name: 'doshi'}],
+            ['yash p. m. doshi'])
         """
         entity_value = []
         original_text = []
+        if not name_tokens:
+            return entity_value, original_text
 
         name_text = " ".join(name_tokens)
-
         first_name = name_tokens[0]
         middle_name = None
         last_name = None
@@ -166,7 +168,7 @@ def detect_entity(self, text, bot_message=None, predetected_values=None, **kwarg
             if self.language in EUROPEAN_LANGUAGES_SET | {ENGLISH_LANG}:
                 entity_value, original_text = self.detect_english_name()
             elif self.language in INDIC_LANGUAGES_SET:
-                entity_value, original_text = self.detect_hindi_name()
+                entity_value, original_text = self.detect_indic_name()
 
             for entity_value_dict in entity_value:
                 entity_value_dict.update({DATASTORE_VERIFIED: True, MODEL_VERIFIED: False})
@@ -201,7 +203,7 @@ def detect_english_name(self, text=None):
         entity_value, original_text = self.get_name_using_pos_tagger(text)
         return entity_value, original_text
 
-    def detect_hindi_name(self):
+    def detect_indic_name(self):
         """
         This method is used to detect Hindi names from the provided text
 
@@ -216,15 +218,15 @@ def detect_hindi_name(self):
             >> [{first_name: u"प्रतिक", middle_name: u"श्रीदत्त", last_name: u"जयराओ"}], [ u'प्रतिक श्रीदत्त जयराओ']
 
         """
-        if self.detect_abusive_phrases_hindi(text=self.text) or self.detect_question_hindi(text=self.text):
+        if self.detect_abusive_phrases_indic(text=self.text) or self.detect_question_indic(text=self.text):
             return [], []
 
         text = self.remove_emojis(text=self.text)
         text_before_hindi_regex_operations = text
         regex = re.compile(u'[^{unicode_range}\\s]+'.format(unicode_range=INDIC_UNICODE_RANGE[self.language]), re.U)
         text = regex.sub(string=text, repl='')
 
-        entity_value, original_text = self.get_hindi_names_without_regex(text=text)
+        entity_value, original_text = self.get_indic_names_without_regex(text=text)
         # Further check for name, if it might have been written in latin script.
         if not entity_value:
             english_present_regex = re.compile(u'[a-zA-Z]+', re.U)
@@ -364,6 +366,7 @@ def detect_person_name_entity(self, replaced_text):
     def context_check_botmessage(self, botmessage):
         """
         Checks if previous botmessage conatins name as a keyword or not
+
         Args:
             botmessage: it consists of the previous botmessage
 
@@ -377,12 +380,12 @@ def context_check_botmessage(self, botmessage):
         botmessage = regex_pattern.sub(r'', botmessage)
 
         botmessage = " " + botmessage.lower().strip() + " "
-        for variant in NAME_VARIATIONS[self.language]:
+        for variant in NAME_VARIATIONS.get(self.language, []):
             if " " + variant + " " in botmessage:
                 return True
         return False
 
-    def get_hindi_names_without_regex(self, text):
+    def get_indic_names_without_regex(self, text):
         """
         This method is used to get detect hindi names without any regex pattern (This method is called only if
         detection from regex patterns fails)
@@ -430,7 +433,7 @@ def replace_stopwords_hindi(self, text):
 
         return ""
 
-    def detect_abusive_phrases_hindi(self, text):
+    def detect_abusive_phrases_indic(self, text):
         """
         This method is used to check for hindi abuses in the sentence
         Args:
@@ -457,7 +460,7 @@ def remove_emojis(self, text):
         text = emoji_pattern.sub(repl='', string=text)
         return text
 
-    def detect_question_hindi(self, text):
+    def detect_question_indic(self, text):
         """
         This method is used to detect if the given text has a hindi question present in it
         Args: