Skip to content

Commit

Permalink
Merge pull request #326 from hellohaptik/develop
Browse files Browse the repository at this point in the history
Develop to Master
  • Loading branch information
chiragjn authored Jan 31, 2020
2 parents f30e705 + 68c48f8 commit 752e22d
Show file tree
Hide file tree
Showing 24 changed files with 582 additions and 455 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,5 @@ ENV/
sftp-config.json
.DS_Store
logs/

.vscode
58 changes: 20 additions & 38 deletions chatbot_ner/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,15 @@
from requests_aws4auth import AWS4Auth

BASE_DIR = os.path.dirname(os.path.dirname(__file__))
CONFIG_PATH = os.path.join(BASE_DIR, 'config')
MODEL_CONFIG_PATH = os.path.join(BASE_DIR, 'model_config')

LOG_PATH = os.path.join(BASE_DIR, 'logs')

# TODO: Set this up via Django LOGGING
# SET UP NER LOGGING
if not os.path.exists(LOG_PATH):
os.makedirs(LOG_PATH)

# LOGGING
# TODO - Make this much generic & simpler in the future
LOG_LEVEL = os.environ.get('DJANGO_LOG_LEVEL', 'error').upper()

# Common formatter
Expand Down Expand Up @@ -48,25 +47,18 @@
nlp_logger.addHandler(handler)
nlp_logger.addHandler(handler_stdout)

if os.path.exists(CONFIG_PATH):
dotenv.read_dotenv(CONFIG_PATH)
else:
ner_logger.debug('Warning: no file named "config" found at %s. This is not a problem if your '
'datastore(elasticsearch) connection settings are already available in the environment',
CONFIG_PATH)

# TODO Consider prefixing everything config with NER_ because these names are in the environment and so are
# TODO lot of others too which may conflict in name. Example user is already using some another instance of
# TODO Elasticsearch for other purposes
ENGINE = os.environ.get('ENGINE')
if ENGINE:
ENGINE = ENGINE.lower()
else:
ner_logger.warning("`ENGINE` variable is not set, Text type entities won't work without it")

# ES settings (Mandatory to use Text type entities)
ES_URL = os.environ.get('ES_URL')
ES_HOST = os.environ.get('ES_HOST')
ES_PORT = os.environ.get('ES_PORT')
ES_INDEX_NAME = os.environ.get('ES_INDEX_NAME')
ES_DOC_TYPE = os.environ.get('ES_DOC_TYPE')
ES_DOC_TYPE = os.environ.get('ES_DOC_TYPE', 'data_dictionary')
ES_AUTH_NAME = os.environ.get('ES_AUTH_NAME')
ES_AUTH_PASSWORD = os.environ.get('ES_AUTH_PASSWORD')
ES_BULK_MSG_SIZE = os.environ.get('ES_BULK_MSG_SIZE', '10000')
Expand All @@ -81,8 +73,8 @@
ES_BULK_MSG_SIZE = int(ES_BULK_MSG_SIZE)
ES_SEARCH_SIZE = int(ES_SEARCH_SIZE)
except ValueError:
ES_BULK_MSG_SIZE = 10000
ES_SEARCH_SIZE = 10000
ES_BULK_MSG_SIZE = 1000
ES_SEARCH_SIZE = 1000

# Optional Vars
ES_INDEX_1 = os.environ.get('ES_INDEX_1')
Expand All @@ -101,10 +93,7 @@
# Crf Model Specific with additional AWS storage (optional)
CRF_MODEL_S3_BUCKET_NAME = os.environ.get('CRF_MODEL_S3_BUCKET_NAME')
CRF_MODEL_S3_BUCKET_REGION = os.environ.get('CRF_MODEL_S3_BUCKET_REGION')

WORD_EMBEDDING_REMOTE_URL = os.environ.get('WORD_EMBEDDING_REMOTE_URL')


GOOGLE_TRANSLATE_API_KEY = os.environ.get('GOOGLE_TRANSLATE_API_KEY')

if not GOOGLE_TRANSLATE_API_KEY:
Expand All @@ -116,6 +105,7 @@
'elasticsearch': {
'connection_url': ES_URL, # Elastic Search URL
'name': ES_INDEX_NAME, # Index name used
'doc_type': ES_DOC_TYPE, # Index's doc type
'host': ES_HOST, # Elastic Search Host
'port': ES_PORT, # Port of elastic search
'user': ES_AUTH_NAME,
Expand All @@ -139,31 +129,23 @@
}
}

if ES_DOC_TYPE:
CHATBOT_NER_DATASTORE['elasticsearch']['doc_type'] = ES_DOC_TYPE
else:
CHATBOT_NER_DATASTORE['elasticsearch']['doc_type'] = 'data_dictionary'

ES_AWS_SECRET_ACCESS_KEY = os.environ.get('ES_AWS_SECRET_ACCESS_KEY')
ES_AWS_ACCESS_KEY_ID = os.environ.get('ES_AWS_ACCESS_KEY_ID')
ES_AWS_REGION = os.environ.get('ES_AWS_REGION')
ES_AWS_SERVICE = os.environ.get('ES_AWS_SERVICE')
ES_AWS_REGION = os.environ.get('ES_AWS_REGION')
ES_AWS_ACCESS_KEY_ID = os.environ.get('ES_AWS_ACCESS_KEY_ID')
ES_AWS_SECRET_ACCESS_KEY = os.environ.get('ES_AWS_SECRET_ACCESS_KEY')

if not ES_AWS_SERVICE:
ES_AWS_SERVICE = 'es'

if ES_AWS_ACCESS_KEY_ID and ES_AWS_SECRET_ACCESS_KEY and ES_AWS_REGION and ES_AWS_SERVICE:
CHATBOT_NER_DATASTORE['elasticsearch']['http_auth'] = AWS4Auth(ES_AWS_ACCESS_KEY_ID, ES_AWS_SECRET_ACCESS_KEY,
ES_AWS_REGION, ES_AWS_SERVICE)
CHATBOT_NER_DATASTORE['elasticsearch']['use_ssl'] = True
CHATBOT_NER_DATASTORE['elasticsearch']['verify_certs'] = True
CHATBOT_NER_DATASTORE['elasticsearch']['connection_class'] = RequestsHttpConnection
elif ES_AWS_REGION and ES_AWS_SERVICE:
if ES_AWS_SERVICE and ES_AWS_REGION:
ner_logger.info('`ES_AWS_SERVICE` and `ES_AWS_REGION` are set. Using AWS Elasticsearch settings ')
CHATBOT_NER_DATASTORE['elasticsearch']['use_ssl'] = True
CHATBOT_NER_DATASTORE['elasticsearch']['verify_certs'] = True
CHATBOT_NER_DATASTORE['elasticsearch']['connection_class'] = RequestsHttpConnection
if ES_AWS_ACCESS_KEY_ID and ES_AWS_SECRET_ACCESS_KEY:
CHATBOT_NER_DATASTORE['elasticsearch']['http_auth'] = AWS4Auth(ES_AWS_ACCESS_KEY_ID,
ES_AWS_SECRET_ACCESS_KEY,
ES_AWS_REGION, ES_AWS_SERVICE)
else:
ner_logger.warning('Elasticsearch: Some or all AWS settings missing from environment, this will skip AWS auth!')
ner_logger.warning('`ES_AWS_SERVICE` and `ES_AWS_REGION` are not set. '
'This is not a problem if you are using self hosted ES')

# Model Vars
if os.path.exists(MODEL_CONFIG_PATH):
Expand Down
4 changes: 2 additions & 2 deletions chatbot_ner/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
# See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/

# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = '70vigjv=h)=p8d%e80(3ue2p70e^x96#n8u+7ia9+7o02iq*6k'
SECRET_KEY = os.environ.get('SECRET_KEY')

# SECURITY WARNING: don't run with debug turned on in production!
_dj_debug = os.environ.get('DJANGO_DEBUG')
_dj_debug = os.environ.get('DJANGO_DEBUG', 'false')
DEBUG = (_dj_debug and _dj_debug.lower() == 'true')

TEMPLATE_DEBUG = False
Expand Down
34 changes: 13 additions & 21 deletions config.example
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
# This is config.example file for chatbot_ner module similar to .env.example file to hold settings
# Copy it to a file named config and fill in all the values.
# Never push your personal keys and passwords to any public repository!
# Make sure the variables in this file are in the environment. Example:
# $ source chatbot_ner/config
# Please don't add spaces around '='

# This is the primary engine to use. Valid values are one of the following:
# elasticsearch
# This is the primary engine to use. Valid values are one of the following: ['elasticsearch']

ENGINE=elasticsearch

# ES prefixed values correspond to settings for elasticsearch.
# ES prefixed variables correspond to settings for elasticsearch.
# ES_URL is the complete url with auth name and password required to connect. If provided, this will override ES_HOST,
# ES_PORT, ES_AUTH_NAME, ES_AUTH_PASSWORD
# ES_HOST by default is host for ES that comes up with compose
Expand All @@ -22,6 +19,15 @@ ES_URL=
ES_PORT=9200
ES_INDEX_NAME=entity_data
ES_DOC_TYPE=data_dictionary
# ES_BULK_MSG_SIZE is an integer value
ES_BULK_MSG_SIZE=1000
# ES_SEARCH_SIZE is an integer value
ES_SEARCH_SIZE=10000
# Provide the following values if you need AWS authentication
ES_AWS_SERVICE=
ES_AWS_REGION=
ES_AWS_ACCESS_KEY_ID=
ES_AWS_SECRET_ACCESS_KEY=

NAME=chatbot_ner
DJANGODIR=/app
Expand All @@ -31,25 +37,11 @@ DJANGO_SETTINGS_MODULE=chatbot_ner.settings
DJANGO_WSGI_MODULE=chatbot_ner/wsgi.py
DJANGO_LOG_LEVEL=debug
DJANGO_DEBUG=False
# Important: Change the value of SECRET_KEY to something else and keep it secret
SECRET_KEY=!yqqcz-v@(s@kpygpvomcuu3il0q1&qtpz)e_g0ulo-sdv%c0c
PORT=8081
TIMEOUT=600

CITY_MODEL_TYPE=crf
CITY_MODEL_PATH=
GOOGLE_TRANSLATE_API_KEY=


# ES_BULK_MSG_SIZE is an integer value
ES_BULK_MSG_SIZE=1000

# ES_SEARCH_SIZE is an integer value
ES_SEARCH_SIZE=10000

# Provide the following values if you need AWS authentication
ES_AWS_SECRET_ACCESS_KEY=
ES_AWS_ACCESS_KEY_ID=
ES_AWS_REGION=
ES_AWS_SERVICE=

# In order to enable entity detection for multiple languages, we use google translate. Please enter the key(optional)
GOOGLE_TRANSLATE_API_KEY=
5 changes: 2 additions & 3 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,8 @@ ENV PORT=8081
ENV TIMEOUT=600
ENV DEBIAN_FRONTEND=noninteractive


#ENV DATE_MODEL_TYPE=crf
#ENV DATE_MODEL_PATH=/root/models/models_live/date/crf/model.crf
# Important change this via .env (the file copied from config.example)
ENV SECRET_KEY=!yqqcz-v@(s@kpygpvomcuu3il0q1&qtpz)e_g0ulo-sdv%c0c

EXPOSE 8081

Expand Down
4 changes: 3 additions & 1 deletion docs/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ Following are the steps to create the Docker image and run NER with Docker.

Docker Compose
```shell
sudo curl -L https://github.com/docker/compose/releases/download/1.22.0/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose
sudo curl -L "https://github.com/docker/compose/releases/download/1.22.0/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose
```
- MacOS:
Expand All @@ -56,6 +56,8 @@ cd docker
docker-compose up --build -d
```

Open `docker/.env` file and edit the environment variables if needed. (You should change the SECRET_KEY).

The above will also mount local repo root directory inside the containers /app directory.
Please wait 5 seconds to run the first curl or do an API call to chatbot_ner.
> **NOTE**: make sure that nothing is running on port 8081 on your server or your local environment.
Expand Down
5 changes: 4 additions & 1 deletion ner_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,5 +57,8 @@
PARAMETER_MAX_DIGITS = 'max_number_digits'
PARAMETER_NUMBER_UNIT_TYPE = 'unit_type'

# Prior detection results from CRF models.
PARAMETER_PRIOR_RESULTS = "predetected_values"

# Locale for Date and Phone Number detection
PARAMETER_LOCALE = 'locale'
PARAMETER_LOCALE = 'locale'
19 changes: 12 additions & 7 deletions ner_v1/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from language_utilities.constant import ENGLISH_LANG
from ner_constants import (PARAMETER_MESSAGE, PARAMETER_ENTITY_NAME, PARAMETER_STRUCTURED_VALUE,
PARAMETER_FALLBACK_VALUE, PARAMETER_BOT_MESSAGE, PARAMETER_TIMEZONE, PARAMETER_REGEX,
PARAMETER_LANGUAGE_SCRIPT,
PARAMETER_SOURCE_LANGUAGE)
PARAMETER_LANGUAGE_SCRIPT, PARAMETER_SOURCE_LANGUAGE, PARAMETER_PRIOR_RESULTS)

from ner_v1.chatbot.combine_detection_logic import combine_output_of_detection_logic_and_tag
from ner_v1.chatbot.entity_detection import (get_location, get_phone_number, get_email, get_city, get_pnr,
get_number, get_passenger_count, get_shopping_size, get_time,
Expand Down Expand Up @@ -69,9 +69,10 @@ def get_parameters_dictionary(request):
PARAMETER_MAX_DIGITS: request.GET.get('max_number_digits'),
PARAMETER_READ_EMBEDDINGS_FROM_REMOTE_URL: to_bool(request.GET.get('read_embeddings_from_remote_url')),
PARAMETER_READ_MODEL_FROM_S3: to_bool(request.GET.get('read_model_from_s3')),
PARAMETER_LIVE_CRF_MODEL_PATH: request.GET.get('live_crf_model_path')
PARAMETER_LIVE_CRF_MODEL_PATH: request.GET.get('live_crf_model_path'),
PARAMETER_PRIOR_RESULTS: json.loads(request.GET.get("predetected_values", '[]'))
}

ner_logger.info("parameters dict - {}".format(parameters_dict))
return parameters_dict


Expand Down Expand Up @@ -103,7 +104,8 @@ def parse_post_request(request):
PARAMETER_MAX_DIGITS: request_data.get('max_number_digits'),
PARAMETER_READ_EMBEDDINGS_FROM_REMOTE_URL: to_bool(request_data.get('read_embeddings_from_remote_url')),
PARAMETER_READ_MODEL_FROM_S3: to_bool(request_data.get('read_model_from_s3')),
PARAMETER_LIVE_CRF_MODEL_PATH: request_data.get('live_crf_model_path')
PARAMETER_LIVE_CRF_MODEL_PATH: request_data.get('live_crf_model_path'),
PARAMETER_PRIOR_RESULTS: request_data.get("predetected_values", [])
}

return parameters_dict
Expand Down Expand Up @@ -247,6 +249,7 @@ def text(request):
live_crf_model_path=parameters_dict[PARAMETER_LIVE_CRF_MODEL_PATH],
read_model_from_s3=parameters_dict[PARAMETER_READ_MODEL_FROM_S3],
read_embeddings_from_remote_url=parameters_dict[PARAMETER_READ_EMBEDDINGS_FROM_REMOTE_URL],
predetected_values=parameters_dict[PARAMETER_PRIOR_RESULTS]
)
ner_logger.debug('Finished %s : %s ' % (parameters_dict[PARAMETER_ENTITY_NAME], entity_output))
except TypeError as e:
Expand All @@ -268,7 +271,8 @@ def location(request):
entity_output = get_location(parameters_dict[PARAMETER_MESSAGE], parameters_dict[PARAMETER_ENTITY_NAME],
parameters_dict[PARAMETER_STRUCTURED_VALUE],
parameters_dict[PARAMETER_FALLBACK_VALUE],
parameters_dict[PARAMETER_BOT_MESSAGE])
parameters_dict[PARAMETER_BOT_MESSAGE],
predetected_values=parameters_dict[PARAMETER_PRIOR_RESULTS])
ner_logger.debug('Finished %s : %s ' % (parameters_dict[PARAMETER_ENTITY_NAME], entity_output))
except TypeError as e:
ner_logger.exception('Exception for location: %s ' % e)
Expand Down Expand Up @@ -361,7 +365,8 @@ def person_name(request):
structured_value=parameters_dict[PARAMETER_STRUCTURED_VALUE],
fallback_value=parameters_dict[PARAMETER_FALLBACK_VALUE],
bot_message=parameters_dict[PARAMETER_BOT_MESSAGE],
language=parameters_dict[PARAMETER_SOURCE_LANGUAGE])
language=parameters_dict[PARAMETER_SOURCE_LANGUAGE],
predetected_values=parameters_dict[PARAMETER_PRIOR_RESULTS])
ner_logger.debug('Finished %s : %s ' % (parameters_dict[PARAMETER_ENTITY_NAME], entity_output))
except TypeError as e:
ner_logger.exception('Exception for person_name: %s ' % e)
Expand Down
Loading

0 comments on commit 752e22d

Please sign in to comment.