Skip to content

Commit

Permalink
Merge pull request #417 from hellohaptik/develop
Browse files Browse the repository at this point in the history
Develop to Master 2021-03-04T13:57:00
  • Loading branch information
chiragjn authored Mar 5, 2021
2 parents 4ca5de4 + a4a6905 commit 44b78ec
Show file tree
Hide file tree
Showing 30 changed files with 982 additions and 919 deletions.
41 changes: 41 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
[run]
source = .
omit =
*.pyc
*.pyo
*/site-packages/*
*/distutils/*
docs/
docker/
logs/
postman_tests/
*/tests/*
*/test.py
*/tests.py
manage.py
*/settings.py
*/urls.py
*/migrations/*
*wsgi.py
*__init__.py

[report]
skip_empty = True
sort = Cover
exclude_lines =
pragma: no cover

# Don't complain about missing debug-only code:
def __repr__
if self\.debug

# Don't complain if tests don't hit defensive assertion code:
raise AssertionError
raise NotImplementedError

# Don't complain if non-runnable code isn't run:
if 0:
if __name__ == .__main__.:

__author__ = 'haptik'
show_missing = True
4 changes: 2 additions & 2 deletions .github/release-drafter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@ categories:
label: packages-updated
- title: 👺 Miscellaneous
label: miscellaneous
exclude-labels:
- miscellaneous
# exclude-labels:
# - miscellaneous
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ ENV/
/newrelic.ini
sftp-config.json
.DS_Store
logs/
logs/*.log*

.vscode
newman_reports/
Expand Down
12 changes: 12 additions & 0 deletions .whitesource
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"scanSettings": {
"baseBranches": []
},
"checkRunSettings": {
"vulnerableCheckRunConclusionLevel": "failure",
"displayMode": "diff"
},
"issueSettings": {
"minSeverityLevel": "LOW"
}
}
26 changes: 4 additions & 22 deletions chatbot_ner/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,10 @@
LOG_PATH = os.path.join(BASE_DIR, 'logs')

# TODO: Set this up via Django LOGGING
# SET UP NER LOGGING
if not os.path.exists(LOG_PATH):
os.makedirs(LOG_PATH)

LOG_LEVEL = os.environ.get('DJANGO_LOG_LEVEL', 'error').upper()

# Common formatter
formatter = logging.Formatter("%(asctime)s\t%(levelname)s\t%(message)s", "%Y-%m-%d %H:%M:%S")
formatter = logging.Formatter("%(asctime)s %(levelname)s %(message)s %(module)s:%(lineno)d")

# Handler for Docker stdout
handler_stdout = logging.StreamHandler()
Expand All @@ -29,28 +25,14 @@

# SETUP NER LOGGING
NER_LOG_FILENAME = os.path.join(LOG_PATH, 'ner_log.log')
# Set up a specific logger with our desired output level
ner_logger = logging.getLogger('NERLogger')
ner_logger.setLevel(LOG_LEVEL)
# Add the log message handler to the logger
handler = logging.handlers.WatchedFileHandler(NER_LOG_FILENAME)
# handler = logging.handlers.RotatingFileHandler(NER_LOG_FILENAME, maxBytes=10 * 1024 * 1024, backupCount=5)
handler.setFormatter(formatter)

ner_logger = logging.getLogger('NERLogger')
ner_logger.setLevel(LOG_LEVEL)
ner_logger.addHandler(handler)
ner_logger.addHandler(handler_stdout)

# SETUP NLP LIB LOGGING
NLP_LIB_LOG_FILENAME = os.path.join(LOG_PATH, 'nlp_log.log')
# Set up a specific logger with our desired output level
nlp_logger = logging.getLogger('NLPLibLogger')
nlp_logger.setLevel(LOG_LEVEL)
# Add the log message handler to the logger
handler = logging.handlers.WatchedFileHandler(NLP_LIB_LOG_FILENAME)
# handler = logging.handlers.RotatingFileHandler(NLP_LIB_LOG_FILENAME, maxBytes=10 * 1024 * 1024, backupCount=5)
handler.setFormatter(formatter)
nlp_logger.addHandler(handler)
nlp_logger.addHandler(handler_stdout)

ENGINE = os.environ.get('ENGINE')
# ES settings (Mandatory to use Text type entities)
ES_SCHEME = os.environ.get('ES_SCHEME', 'http')
Expand Down
40 changes: 31 additions & 9 deletions chatbot_ner/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@

# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
from __future__ import absolute_import

import os
import sys

from chatbot_ner.setup_sentry import setup_sentry

BASE_DIR = os.path.dirname(os.path.dirname(__file__))
ENVIRONMENT = os.environ.get('ENVIRONMENT') or os.environ.get('HAPTIK_ENV')

# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/
Expand Down Expand Up @@ -59,6 +61,28 @@
'django.middleware.clickjacking.XFrameOptionsMiddleware',
]

# APM
_elastic_apm_enabled = (os.environ.get('ELASTIC_APM_ENABLED') or '').strip().lower()
ELASTIC_APM_ENABLED = (_elastic_apm_enabled == 'true') and 'test' not in sys.argv
ELASTIC_APM_SERVER_URL = os.environ.get('ELASTIC_APM_SERVER_URL')
if ELASTIC_APM_ENABLED:
ELASTIC_APM = {
'DEBUG': DEBUG,
'SERVICE_NAME': 'chatbot_ner',
'SERVER_URL': ELASTIC_APM_SERVER_URL,
'SPAN_FRAMES_MIN_DURATION': '5ms',
'STACK_TRACE_LIMIT': 500,
'ENVIRONMENT': ENVIRONMENT,
'TRANSACTION_SAMPLE_RATE': '0.1',
'TRANSACTION_MAX_SPANS': 500,
'INSTRUMENT': 'True',
'DISABLE_SEND': 'False',
'CAPTURE_BODY': 'off',
'SERVER_TIMEOUT': '2s',
}
INSTALLED_APPS.append('elasticapm.contrib.django')
MIDDLEWARE.append('elasticapm.contrib.django.middleware.TracingMiddleware')

ROOT_URLCONF = 'chatbot_ner.urls'

WSGI_APPLICATION = 'chatbot_ner.wsgi.application'
Expand Down Expand Up @@ -96,27 +120,25 @@ def __getitem__(self, item):
'CONN_MAX_AGE': 60
}

# MIGRATION_MODULES = DisableMigrations()


TEST_RUNNER = 'django_nose.NoseTestSuiteRunner'
NOSE_ARGS = [
'--nocapture',
'--nologcapture',
'--verbosity=3',
'--ignore-files=urls.py',
'--ignore-files=wsgi.py',
'--exclude-dir=chatbot_ner/',
'--exclude-dir=docs/',
'--exclude-dir=docker/',
'--exclude-dir=data/',
'--ignore-files=manage.py',
'--ignore-files=nltk_setup.py',
'--ignore-files=__init__.py',
'--ignore-files=const.py',
'--ignore-files=constant.py',
'--ignore-files=constants.py',
'--ignore-files=settings.py',
'--ignore-files=run_postman_tests.py',
'--exclude-dir=docs/',
'--exclude-dir=docker/',
'--exclude-dir=data/',
'--cover-erase',
'--cover-package=datastore,external_api,language_utilities,lib,models,ner_v1,ner_v2',
'--cover-inclusive',
]

# Internationalization
Expand Down
4 changes: 2 additions & 2 deletions chatbot_ner/setup_sentry.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@

# Support for Sentry DSN
SENTRY_DSN = os.environ.get('SENTRY_DSN')
SENTRY_ENABLED = os.environ.get('SENTRY_ENABLED')
SENTRY_ENABLED = True if SENTRY_ENABLED == 'True' and 'test' not in sys.argv else False
_sentry_enabled = (os.environ.get('SENTRY_ENABLED') or '').strip().lower()
SENTRY_ENABLED = (_sentry_enabled == 'true' and 'test' not in sys.argv)


def setup_sentry():
Expand Down
Empty file added logs/.gitkeep
Empty file.
43 changes: 23 additions & 20 deletions ner_v1/detectors/textual/name/name_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,13 @@
from language_utilities.constant import (ENGLISH_LANG, INDIC_LANGUAGES_SET, EUROPEAN_LANGUAGES_SET)
from ner_v1.constant import DATASTORE_VERIFIED, MODEL_VERIFIED
from ner_v1.constant import EMOJI_RANGES, FIRST_NAME, MIDDLE_NAME, LAST_NAME
from ner_v1.detectors.textual.name.hindi_const import (INDIC_BADWORDS, INDIC_QUESTIONWORDS,
INDIC_STOPWORDS, NAME_VARIATIONS, INDIC_UNICODE_RANGE,
COMMON_INDIC_WORDS_OCCURRING_WITH_NAME)
from ner_v1.detectors.textual.name.lang_constants import (INDIC_BADWORDS, INDIC_QUESTIONWORDS,
INDIC_STOPWORDS, NAME_VARIATIONS, INDIC_UNICODE_RANGE,
COMMON_INDIC_WORDS_OCCURRING_WITH_NAME)
from six.moves import range


# TODO: Refactor this module for readability and useability. Remove any hacks
# TODO: Make this module python 3 compatible

class NameDetector(object):
"""
Expand Down Expand Up @@ -62,21 +61,24 @@ def get_format_name(name_tokens, text):
2.The original text.
Args:
name_tokens (list): List of tokens in the name
Example:
['yash', 'doshi']
name_tokens (list): List of tokens in the name. e.g. ['yash', 'doshi']
Returns:
(
[{first_name: "yash", middle_name: None, last_name: "doshi"}],
["yash modi"]
)
(list, list): tuple containing
list: list of dictionaries, one for each detected name
list: list of str, the original text span for each detected name
Examples:
>>> NameDetector.get_format_name(['yash', 'p.', 'm.', 'doshi'], 'my name is yash p. m. doshi')
([{first_name: 'yash', middle_name: 'p. m.', last_name: 'doshi'}],
['yash p. m. doshi'])
"""
entity_value = []
original_text = []
if not name_tokens:
return entity_value, original_text

name_text = " ".join(name_tokens)

first_name = name_tokens[0]
middle_name = None
last_name = None
Expand Down Expand Up @@ -166,7 +168,7 @@ def detect_entity(self, text, bot_message=None, predetected_values=None, **kwarg
if self.language in EUROPEAN_LANGUAGES_SET | {ENGLISH_LANG}:
entity_value, original_text = self.detect_english_name()
elif self.language in INDIC_LANGUAGES_SET:
entity_value, original_text = self.detect_hindi_name()
entity_value, original_text = self.detect_indic_name()

for entity_value_dict in entity_value:
entity_value_dict.update({DATASTORE_VERIFIED: True, MODEL_VERIFIED: False})
Expand Down Expand Up @@ -201,7 +203,7 @@ def detect_english_name(self, text=None):
entity_value, original_text = self.get_name_using_pos_tagger(text)
return entity_value, original_text

def detect_hindi_name(self):
def detect_indic_name(self):
"""
This method is used to detect Hindi names from the provided text
Expand All @@ -216,15 +218,15 @@ def detect_hindi_name(self):
>> [{first_name: u"प्रतिक", middle_name: u"श्रीदत्त", last_name: u"जयराओ"}], [ u'प्रतिक श्रीदत्त जयराओ']
"""
if self.detect_abusive_phrases_hindi(text=self.text) or self.detect_question_hindi(text=self.text):
if self.detect_abusive_phrases_indic(text=self.text) or self.detect_question_indic(text=self.text):
return [], []

text = self.remove_emojis(text=self.text)
text_before_hindi_regex_operations = text
regex = re.compile(u'[^{unicode_range}\\s]+'.format(unicode_range=INDIC_UNICODE_RANGE[self.language]), re.U)
text = regex.sub(string=text, repl='')

entity_value, original_text = self.get_hindi_names_without_regex(text=text)
entity_value, original_text = self.get_indic_names_without_regex(text=text)
# Further check for name, if it might have been written in latin script.
if not entity_value:
english_present_regex = re.compile(u'[a-zA-Z]+', re.U)
Expand Down Expand Up @@ -364,6 +366,7 @@ def detect_person_name_entity(self, replaced_text):
def context_check_botmessage(self, botmessage):
"""
Checks if previous botmessage conatins name as a keyword or not
Args:
botmessage: it consists of the previous botmessage
Expand All @@ -377,12 +380,12 @@ def context_check_botmessage(self, botmessage):
botmessage = regex_pattern.sub(r'', botmessage)

botmessage = " " + botmessage.lower().strip() + " "
for variant in NAME_VARIATIONS[self.language]:
for variant in NAME_VARIATIONS.get(self.language, []):
if " " + variant + " " in botmessage:
return True
return False

def get_hindi_names_without_regex(self, text):
def get_indic_names_without_regex(self, text):
"""
This method is used to get detect hindi names without any regex pattern (This method is called only if
detection from regex patterns fails)
Expand Down Expand Up @@ -430,7 +433,7 @@ def replace_stopwords_hindi(self, text):

return ""

def detect_abusive_phrases_hindi(self, text):
def detect_abusive_phrases_indic(self, text):
"""
This method is used to check for hindi abuses in the sentence
Args:
Expand All @@ -457,7 +460,7 @@ def remove_emojis(self, text):
text = emoji_pattern.sub(repl='', string=text)
return text

def detect_question_hindi(self, text):
def detect_question_indic(self, text):
"""
This method is used to detect if the given text has a hindi question present in it
Args:
Expand Down
Loading

0 comments on commit 44b78ec

Please sign in to comment.