diff --git a/.env.example b/.env.example new file mode 100644 index 000000000..a415407d7 --- /dev/null +++ b/.env.example @@ -0,0 +1,62 @@ +# This is .env.example file for chatbot_ner module to hold settings +# Copy it to a docker/.env and fill in all the values. +# Never push your personal keys and passwords to any public repository! +# Please don't add spaces around '=' + +NAME=chatbot_ner +DJANGODIR=/app +ENVIRONMENT=development +DJANGO_LOG_LEVEL=DEBUG +DJANGO_SETTINGS_MODULE=chatbot_ner.settings +DJANGO_WSGI_MODULE=chatbot_ner/wsgi.py +# Important: Change the value of SECRET_KEY to something else and keep it secret +SECRET_KEY=!yqqcz-v@(s@kpygpvomcuu3il0q1&qtpz)e_g0ulo-sdv%c0c + +NUM_WORKERS=1 +MAX_REQUESTS=1000 +PORT=8081 +TIMEOUT=600 + +# This is the primary engine to use for datastore. Valid values are one of the following: ['elasticsearch'] +ENGINE=elasticsearch + +# ES prefixed variables correspond to settings for elasticsearch. +# ES_URL is the complete url with auth name and password required to connect. If provided, this will override ES_HOST, +# ES_PORT, ES_AUTH_NAME, ES_AUTH_PASSWORD +# ES_HOST and ES_PORT by default is host for ES that comes up with compose + +ES_URL= +ES_AUTH_NAME= +ES_AUTH_PASSWORD= +ES_SCHEME=http +ES_HOST=elasticsearch +ES_PORT=9200 +ES_ALIAS=entity_data +ES_INDEX_1=entity_data_v1 +ES_INDEX_2= +ES_DOC_TYPE=data_dictionary +ELASTICSEARCH_CRF_DATA_INDEX_NAME=entity_examples_data +ELASTICSEARCH_CRF_DATA_DOC_TYPE=training_dictionary + +ES_REQUEST_TIMEOUT=20 +ES_BULK_MSG_SIZE=1000 +ES_SEARCH_SIZE=10000 + +# Auth variables if ES is hosted on AWS +ES_AWS_ACCESS_KEY_ID= +ES_AWS_REGION= +ES_AWS_SECRET_ACCESS_KEY= +ES_AWS_SERVICE= + +DESTINATION_ES_SCHEME= +DESTINATION_HOST= +DESTINATION_PORT= + +# In order to enable entity detection for multiple languages, we use google translate. Please enter the key(optional) +GOOGLE_TRANSLATE_API_KEY= + +# Application performance and error alerting +ELASTIC_APM_ENABLED=False +ELASTIC_APM_SERVER_URL= +SENTRY_ENABLED=False +SENTRY_DSN= diff --git a/.gitignore b/.gitignore index 5409238f3..20606789e 100644 --- a/.gitignore +++ b/.gitignore @@ -3,9 +3,6 @@ __pycache__/ *.py[cod] *$py.class -# haptik ner config file -config - # C Extensions *.so @@ -79,6 +76,7 @@ celerybeat-schedule # dotenv .env +!.env.example update_env.sh # virtualenv @@ -94,16 +92,18 @@ ENV/ .idea/ .realsync -/Dockerfile* -/post-merge -/entrypoint.sh -/setup.py -/docker-compose.yml -/newrelic.ini -sftp-config.json +Dockerfile* +post-merge +entrypoint.sh +setup.py +docker-compose*.yml +.dockerignore +newrelic.ini +dbdata/ .DS_Store logs/*.log* .vscode newman_reports/ +sftp-config.json dev.json diff --git a/chatbot_ner/asgi.py b/chatbot_ner/asgi.py new file mode 100644 index 000000000..b4e3aa340 --- /dev/null +++ b/chatbot_ner/asgi.py @@ -0,0 +1,17 @@ +# pylint: disable=W1618 +""" +ASGI config for chatbot_ner project. + +It exposes the ASGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/3.2/howto/deployment/asgi/ +""" + +import os + +from django.core.asgi import get_asgi_application + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'chatbot_ner.settings') + +application = get_asgi_application() diff --git a/chatbot_ner/settings.py b/chatbot_ner/settings.py index b657223fe..37c0864e1 100755 --- a/chatbot_ner/settings.py +++ b/chatbot_ner/settings.py @@ -1,11 +1,11 @@ """ -Django settings for predictive_server project. +Django settings for chatbot_ner project. For more information on this file, see -https://docs.djangoproject.com/en/1.11/topics/settings/ +https://docs.djangoproject.com/en/3.2/topics/settings/ For the full list of settings and their values, see -https://docs.djangoproject.com/en/1.11/ref/settings/ +https://docs.djangoproject.com/en/3.2/ref/settings/ """ # Build paths inside the project like this: os.path.join(BASE_DIR, ...) @@ -20,24 +20,17 @@ ENVIRONMENT = os.environ.get('ENVIRONMENT') or os.environ.get('HAPTIK_ENV') # Quick-start development settings - unsuitable for production -# See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/ +# See https://docs.djangoproject.com/en/3.2/howto/deployment/checklist/ # SECURITY WARNING: keep the secret key used in production secret! SECRET_KEY = os.environ.get('SECRET_KEY') # SECURITY WARNING: don't run with debug turned on in production! DEBUG = False - TEMPLATE_DEBUG = False - ALLOWED_HOSTS = ['*'] -# setup sentry - -setup_sentry() - # Application definition - INSTALLED_APPS = [ 'django.contrib.admin', 'django.contrib.auth', @@ -48,10 +41,12 @@ 'datastore', 'ner_v1', 'ner_v2', + # TODO: drop dependency on `nose`, no longer actively maintained 'django_nose' ] MIDDLEWARE = [ + 'django.middleware.security.SecurityMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', 'django.middleware.common.CommonMiddleware', 'django.middleware.csrf.CsrfViewMiddleware', @@ -60,11 +55,81 @@ 'django.middleware.clickjacking.XFrameOptionsMiddleware', ] +ROOT_URLCONF = 'chatbot_ner.urls' + +TEMPLATES = [ + { + 'BACKEND': 'django.template.backends.django.DjangoTemplates', + 'DIRS': [], + 'APP_DIRS': True, + 'OPTIONS': { + 'context_processors': [ + 'django.template.context_processors.debug', + 'django.template.context_processors.request', + 'django.contrib.auth.context_processors.auth', + 'django.contrib.messages.context_processors.messages', + ], + }, + }, +] + +WSGI_APPLICATION = 'chatbot_ner.wsgi.application' + +# Internationalization +# https://docs.djangoproject.com/en/3.2/topics/i18n/ + +LANGUAGE_CODE = 'en-us' +TIME_ZONE = 'UTC' +USE_I18N = True +USE_L10N = True +USE_TZ = True + +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/3.2/howto/static-files/ + +STATIC_URL = '/static/' + +# Default primary key field type +# https://docs.djangoproject.com/en/3.2/ref/settings/#default-auto-field + +DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' + +# Database +# https://docs.djangoproject.com/en/3.2/ref/settings/#databases +DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.sqlite3', + 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), + } +} + +# Password validation +# https://docs.djangoproject.com/en/3.2/ref/settings/#auth-password-validators + +AUTH_PASSWORD_VALIDATORS = [ + { + 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', + }, +] + +# setup sentry +setup_sentry() + # APM _elastic_apm_enabled = (os.environ.get('ELASTIC_APM_ENABLED') or '').strip().lower() ELASTIC_APM_ENABLED = (_elastic_apm_enabled == 'true') and 'test' not in sys.argv -ELASTIC_APM_SERVER_URL = os.environ.get('ELASTIC_APM_SERVER_URL') + if ELASTIC_APM_ENABLED: + ELASTIC_APM_SERVER_URL = os.environ.get('ELASTIC_APM_SERVER_URL') ELASTIC_APM = { 'DEBUG': DEBUG, 'SERVICE_NAME': 'chatbot_ner', @@ -72,88 +137,50 @@ 'SPAN_FRAMES_MIN_DURATION': '5ms', 'STACK_TRACE_LIMIT': 500, 'ENVIRONMENT': ENVIRONMENT, - 'TRANSACTION_SAMPLE_RATE': '0.1', + 'TRANSACTION_SAMPLE_RATE': 0.1, 'TRANSACTION_MAX_SPANS': 500, - 'INSTRUMENT': 'True', - 'DISABLE_SEND': 'False', + 'INSTRUMENT': True, + 'DISABLE_SEND': False, 'CAPTURE_BODY': 'off', 'SERVER_TIMEOUT': '2s', + 'API_REQUEST_TIME': '10s', + 'DJANGO_AUTOINSERT_MIDDLEWARE': False, + 'DISABLE_LOG_RECORD_FACTORY': True, } INSTALLED_APPS.append('elasticapm.contrib.django') MIDDLEWARE.append('elasticapm.contrib.django.middleware.TracingMiddleware') -ROOT_URLCONF = 'chatbot_ner.urls' - -WSGI_APPLICATION = 'chatbot_ner.wsgi.application' - - -# Database -# https://docs.djangoproject.com/en/1.11/ref/settings/#databases - -# FOR TEST CASES - COMMON SETTINGS FOR ALL ENVIRONMENTS - - -class DisableMigrations(object): - - def __contains__(self, item): - return True - - def __getitem__(self, item): - return None - - -DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.sqlite3', - 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), - } -} - -# Keeping this block here for ease in the future -TEST_DB_PATH = os.environ.get('TEST_DB_PATH') or '/dev/shm/chatbot_ner_test.db.sqlite3' +TEST_RUNNER = 'django_nose.NoseTestSuiteRunner' if 'test' in sys.argv: + # FOR TEST CASES - COMMON SETTINGS FOR ALL ENVIRONMENTS + TEST_DB_PATH = os.environ.get('TEST_DB_PATH') or '/dev/shm/chatbot_ner_test.db.sqlite3' DATABASES['default'] = { 'ENGINE': 'django.db.backends.sqlite3', 'NAME': TEST_DB_PATH, 'CONN_MAX_AGE': 60 } - -TEST_RUNNER = 'django_nose.NoseTestSuiteRunner' -NOSE_ARGS = [ - '--nocapture', - '--nologcapture', - '--verbosity=3', - '--exclude-dir=chatbot_ner/', - '--exclude-dir=docs/', - '--exclude-dir=docker/', - '--exclude-dir=data/', - '--ignore-files=manage.py', - '--ignore-files=nltk_setup.py', - '--ignore-files=__init__.py', - '--ignore-files=const.py', - '--ignore-files=constant.py', - '--ignore-files=constants.py', - '--ignore-files=run_postman_tests.py', - '--cover-erase', - '--cover-package=datastore,external_api,language_utilities,lib,ner_v1,ner_v2', - '--cover-inclusive', -] - -# Internationalization -# https://docs.djangoproject.com/en/1.11/topics/i18n/ - -LANGUAGE_CODE = 'en-us' - -TIME_ZONE = 'UTC' - -USE_I18N = True - -USE_L10N = True - -USE_TZ = True - -# Static files (CSS, JavaScript, Images) -# https://docs.djangoproject.com/en/1.11/howto/static-files/ - -STATIC_URL = '/static/' + MIGRATION_MODULES = { + 'datastore': None, + 'ner_v1': None, + 'ner_v2': None, + } + NOSE_ARGS = [ + '--nocapture', + '--nologcapture', + '--verbosity=3', + '--exclude-dir=chatbot_ner/', + '--exclude-dir=docs/', + '--exclude-dir=docker/', + '--exclude-dir=data/', + '--ignore-files=manage.py', + '--ignore-files=nltk_setup.py', + '--ignore-files=__init__.py', + '--ignore-files=const.py', + '--ignore-files=constant.py', + '--ignore-files=constants.py', + '--ignore-files=run_postman_tests.py', + '--cover-erase', + '--cover-package=datastore,external_api,language_utilities,lib,ner_v1,ner_v2', + '--cover-inclusive', + ] diff --git a/chatbot_ner/urls.py b/chatbot_ner/urls.py index b3c5d7b28..64f4918d4 100755 --- a/chatbot_ner/urls.py +++ b/chatbot_ner/urls.py @@ -1,63 +1,61 @@ from __future__ import absolute_import -from django.conf.urls import url +from django.urls import re_path +from external_api import api as external_api from ner_v1 import api as api_v1 from ner_v2 import api as api_v2 -from external_api import api as external_api - - urlpatterns = [ - url(r'^v1/text_bulk/$', api_v1.text), - url(r'^v1/text/$', api_v1.text), - url(r'^v1/location/$', api_v1.location), - url(r'^v1/phone_number/$', api_v1.phone_number), - url(r'^v1/email/$', api_v1.email), - url(r'^v1/city/$', api_v1.city), - url(r'^v1/pnr/$', api_v1.pnr), - url(r'^v1/shopping_size/$', api_v1.shopping_size), - url(r'^v1/passenger_count/$', api_v1.passenger_count), - url(r'^v1/number/$', api_v1.number), - url(r'^v1/time/$', api_v1.time), - url(r'^v1/time_with_range/$', api_v1.time_with_range), - url(r'^v1/date/$', api_v1.date), - url(r'^v1/budget/$', api_v1.budget), - url(r'^v1/ner/$', api_v1.ner), - url(r'^v1/combine_output/$', api_v1.combine_output), - url(r'^v1/person_name/$', api_v1.person_name), - url(r'^v1/regex/$', api_v1.regex), + re_path(r'^v1/text_bulk/$', api_v1.text), + re_path(r'^v1/text/$', api_v1.text), + re_path(r'^v1/location/$', api_v1.location), + re_path(r'^v1/phone_number/$', api_v1.phone_number), + re_path(r'^v1/email/$', api_v1.email), + re_path(r'^v1/city/$', api_v1.city), + re_path(r'^v1/pnr/$', api_v1.pnr), + re_path(r'^v1/shopping_size/$', api_v1.shopping_size), + re_path(r'^v1/passenger_count/$', api_v1.passenger_count), + re_path(r'^v1/number/$', api_v1.number), + re_path(r'^v1/time/$', api_v1.time), + re_path(r'^v1/time_with_range/$', api_v1.time_with_range), + re_path(r'^v1/date/$', api_v1.date), + re_path(r'^v1/budget/$', api_v1.budget), + re_path(r'^v1/ner/$', api_v1.ner), + re_path(r'^v1/combine_output/$', api_v1.combine_output), + re_path(r'^v1/person_name/$', api_v1.person_name), + re_path(r'^v1/regex/$', api_v1.regex), # V2 detectors - url(r'^v2/date/$', api_v2.date), - url(r'^v2/time/$', api_v2.time), - url(r'^v2/number/$', api_v2.number), - url(r'^v2/phone_number/$', api_v2.phone_number), - url(r'^v2/number_range/$', api_v2.number_range), - url(r'^v2/text/$', api_v2.text), + re_path(r'^v2/date/$', api_v2.date), + re_path(r'^v2/time/$', api_v2.time), + re_path(r'^v2/number/$', api_v2.number), + re_path(r'^v2/phone_number/$', api_v2.phone_number), + re_path(r'^v2/number_range/$', api_v2.number_range), + re_path(r'^v2/text/$', api_v2.text), # V2 bulk detectors - url(r'^v2/date_bulk/$', api_v2.date), - url(r'^v2/time_bulk/$', api_v2.time), - url(r'^v2/number_bulk/$', api_v2.number), - url(r'^v2/number_range_bulk/$', api_v2.number_range), - url(r'^v2/phone_number_bulk/$', api_v2.phone_number), + re_path(r'^v2/date_bulk/$', api_v2.date), + re_path(r'^v2/time_bulk/$', api_v2.time), + re_path(r'^v2/number_bulk/$', api_v2.number), + re_path(r'^v2/number_range_bulk/$', api_v2.number_range), + re_path(r'^v2/phone_number_bulk/$', api_v2.phone_number), # Deprecated dictionary read write, use entities/data/v1/* - url(r'^entities/get_entity_word_variants', external_api.get_entity_word_variants), - url(r'^entities/update_dictionary', external_api.update_dictionary), + re_path(r'^entities/get_entity_word_variants', external_api.get_entity_word_variants), + re_path(r'^entities/update_dictionary', external_api.update_dictionary), # Transfer Dictioanry - url(r'^entities/transfer_entities', external_api.transfer_entities), + re_path(r'^entities/transfer_entities', external_api.transfer_entities), # Training Data Read Write - url(r'^entities/get_crf_training_data', external_api.get_crf_training_data), - url(r'^entities/update_crf_training_data', external_api.update_crf_training_data), + re_path(r'^entities/get_crf_training_data', external_api.get_crf_training_data), + re_path(r'^entities/update_crf_training_data', external_api.update_crf_training_data), - url(r'^entities/languages/v1/(?P.+)$', external_api.entity_language_view), - url(r'^entities/data/v1/(?P.+)$', external_api.entity_data_view), + re_path(r'^entities/languages/v1/(?P.+)$', external_api.entity_language_view), + re_path(r'^entities/data/v1/(?P.+)$', external_api.entity_data_view), # Read unique values for text entity - url(r'^entities/values/v1/(?P.+)$', external_api.read_unique_values_for_text_entity), + re_path(r'^entities/values/v1/(?P.+)$', external_api.read_unique_values_for_text_entity), ] diff --git a/chatbot_ner/wsgi.py b/chatbot_ner/wsgi.py index 413a2b682..837d79d67 100755 --- a/chatbot_ner/wsgi.py +++ b/chatbot_ner/wsgi.py @@ -1,14 +1,16 @@ """ -WSGI config for predictive_server project. +WSGI config for chatbot_ner project. It exposes the WSGI callable as a module-level variable named ``application``. For more information on this file, see -https://docs.djangoproject.com/en/1.11/howto/deployment/wsgi/ +https://docs.djangoproject.com/en/3.2/howto/deployment/wsgi/ """ -from __future__ import absolute_import + import os -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "chatbot_ner.settings") from django.core.wsgi import get_wsgi_application + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'chatbot_ner.settings') + application = get_wsgi_application() diff --git a/config.example b/config.example deleted file mode 100644 index 72b63d81f..000000000 --- a/config.example +++ /dev/null @@ -1,55 +0,0 @@ -# This is config.example file for chatbot_ner module similar to .env.example file to hold settings -# Copy it to a docker/.env and fill in all the values. -# Never push your personal keys and passwords to any public repository! -# Please don't add spaces around '=' - -NAME=chatbot_ner -DJANGODIR=/app -DJANGO_LOG_LEVEL=DEBUG -DJANGO_SETTINGS_MODULE=chatbot_ner.settings -DJANGO_WSGI_MODULE=chatbot_ner/wsgi.py -# Important: Change the value of SECRET_KEY to something else and keep it secret -SECRET_KEY=!yqqcz-v@(s@kpygpvomcuu3il0q1&qtpz)e_g0ulo-sdv%c0c - -NUM_WORKERS=1 -MAX_REQUESTS=1000 -PORT=8081 -TIMEOUT=600 - -# This is the primary engine to use for datastore. Valid values are one of the following: ['elasticsearch'] -ENGINE=elasticsearch - -# ES prefixed variables correspond to settings for elasticsearch. -# ES_URL is the complete url with auth name and password required to connect. If provided, this will override ES_HOST, -# ES_PORT, ES_AUTH_NAME, ES_AUTH_PASSWORD -# ES_HOST and ES_PORT by default is host for ES that comes up with compose - -ES_URL= -ES_AUTH_NAME= -ES_AUTH_PASSWORD= -ES_SCHEME=http -ES_HOST=elasticsearch -ES_PORT=9200 -ES_ALIAS=entity_data -ES_INDEX_1=entity_data_v1 -ES_INDEX_2= -ES_DOC_TYPE=data_dictionary -ELASTICSEARCH_CRF_DATA_INDEX_NAME=entity_examples_data -ELASTICSEARCH_CRF_DATA_DOC_TYPE=training_dictionary - -ES_BULK_MSG_SIZE=1000 -ES_SEARCH_SIZE=10000 - -# Auth variables if ES is hosted on AWS -ES_AWS_ACCESS_KEY_ID= -ES_AWS_REGION= -ES_AWS_SECRET_ACCESS_KEY= -ES_AWS_SERVICE= - -DESTINATION_ES_SCHEME= -DESTINATION_HOST= -DESTINATION_PORT= - -# In order to enable entity detection for multiple languages, we use google translate. Please enter the key(optional) -GOOGLE_TRANSLATE_API_KEY= - diff --git a/config.example b/config.example new file mode 120000 index 000000000..534f3d9c4 --- /dev/null +++ b/config.example @@ -0,0 +1 @@ +.env.example \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 000000000..6f43808fa --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,40 @@ +FROM python:3.6.15 +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && \ + apt-get install -y wget build-essential curl nginx supervisor + +WORKDIR /app + +COPY requirements.txt nltk_setup.py /app/ + +RUN touch /app/config && \ + pip install --no-cache-dir -U pip && \ + pip install --no-cache-dir -r /app/requirements.txt && \ + pip check && \ + python /app/nltk_setup.py + +COPY docker/supervisord.conf /etc/supervisor/conf.d/supervisord.conf +COPY docker/nginx.conf /etc/nginx/nginx.conf +COPY docker/default.site.conf /etc/nginx/sites-available/default + +# TODO: Separate this out to a dev/test docker image +RUN curl -sL https://deb.nodesource.com/setup_12.x | bash && \ + apt-get install nodejs && \ + npm install -g newman && \ + npm install -g newman-reporter-htmlextra && \ + rm -rf /tmp/* ~/.cache/pip /var/lib/apt/lists/* + + +ENV NAME="chatbot_ner" +ENV DJANGODIR=/app +ENV NUM_WORKERS=4 +ENV DJANGO_SETTINGS_MODULE=chatbot_ner.settings +ENV PORT=8081 +ENV TIMEOUT=600 +# Important change this via .env (the file copied from .env.example) +ENV SECRET_KEY=!yqqcz-v@(s@kpygpvomcuu3il0q1&qtpz)e_g0ulo-sdv%c0c + +ADD . /app +EXPOSE 8081 +# entrypoint/cmd script +CMD /app/docker/cmd.sh diff --git a/docker/Dockerfile-python3 b/docker/Dockerfile-python3 deleted file mode 100644 index e19283a39..000000000 --- a/docker/Dockerfile-python3 +++ /dev/null @@ -1,42 +0,0 @@ -FROM python:3.6.10 - -RUN apt-get update && apt-get install -y wget build-essential curl nginx supervisor - -RUN curl -sL https://deb.nodesource.com/setup_12.x | bash && \ - apt-get install nodejs && \ - npm install -g newman && \ - npm install -g newman-reporter-htmlextra - -WORKDIR /app - -COPY docker/install.sh nltk_setup.py datastore_setup.py /app/ -COPY docker/supervisord.conf /etc/supervisor/conf.d/supervisord.conf - -# cython is installed because pandas build fails otherwise -RUN /app/install.sh && \ - touch /app/config && \ - pip install -U pip && \ - pip install --no-cache-dir -I uwsgi && \ - pip install cython - -COPY requirements.txt /app/requirements.txt - -RUN pip install --no-cache-dir -r /app/requirements.txt - -ENV NAME="chatbot_ner" -ENV DJANGODIR=/app -ENV NUM_WORKERS=4 -ENV DJANGO_SETTINGS_MODULE=chatbot_ner.settings -ENV PORT=8081 -ENV TIMEOUT=600 -ENV DEBIAN_FRONTEND=noninteractive - -# Important change this via .env (the file copied from config.example) -ENV SECRET_KEY=!yqqcz-v@(s@kpygpvomcuu3il0q1&qtpz)e_g0ulo-sdv%c0c - -EXPOSE 8081 - -ADD . /app - -# entrypoint/cmd script -CMD /app/docker/cmd.sh diff --git a/docker/cmd.sh b/docker/cmd.sh index 3ef08fc20..26547b4bb 100755 --- a/docker/cmd.sh +++ b/docker/cmd.sh @@ -6,16 +6,8 @@ export DJANGO_SETTINGS_MODULE=$DJANGO_SETTINGS_MODULE export PYTHONPATH=$DJANGODIR:$PYTHONPATH # Initial setup.py - Datastore lines need to be commented for using previously create data - -python /app/nltk_setup.py || { echo 'nltk setup failed'; exit 1; } -sleep 8 +# TODO: Bad design, should not need to comment, move this as a separate step outside of dockerfile +# we sleep to make sure elasticsearch is up by the time we start setting up +sleep 10 python /app/datastore_setup.py || { echo 'datastore setup failed'; exit 1; } - -# Using supervisor as we want to use Nginx and Uwsgi both, Settings specified in supervisord.conf, any update to that will need build - /usr/bin/supervisord - - -# Below parameters can be changed as you wish, values fetched from env variables. You can only run UWSGI by uncommenting the next uwsgi line and commenting above supervisor line -#uwsgi --wsgi-file chatbot_ner/wsgi.py --http :$PORT --workers=$NUM_WORKERS --disable-logging --master --max-requests=$MAX_REQUESTS --harakiri=$TIMEOUT --reload-mercy=120 --worker-reload-mercy=120 --thunder-lock --http-auto-chunked --http-keepalive --vacuum && /usr/sbin/nginx -g 'daemon off;' -#/usr/sbin/nginx -g 'daemon off;' diff --git a/docker/default.site.conf b/docker/default.site.conf new file mode 100644 index 000000000..ebc1ea37b --- /dev/null +++ b/docker/default.site.conf @@ -0,0 +1,22 @@ +# This is the server block. This file overwrites the /etc/nginx/sites-available/default that is installed by +# default by nginx +server { + listen 80 default_server; + listen [::]:80 default_server; + error_log /var/log/nginx/error.log; + access_log /var/log/nginx/access.log; + root /app; + location / { + proxy_pass_header Server; + proxy_set_header Host $http_host; + proxy_redirect off; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Scheme $scheme; + real_ip_recursive on; + real_ip_header X-Forwarded-For; + set_real_ip_from 0.0.0.0/0; + proxy_connect_timeout 300; + proxy_read_timeout 300; + proxy_pass http://127.0.0.1:8081/; + } +} \ No newline at end of file diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 71ec3693d..55066beb0 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -1,6 +1,6 @@ # Will start the application, mount local directory, # If you don't want to keep env variables in compose, you can keep an env file that can be copied and source while building the image -# Variables used in the Django app itself can also be configured at chatbot_ner/config (copy config.example to config and edit it) +# Variables used in the Django app itself can also be configured at chatbot_ner/.env (copy .env.example to .env and edit it) # This will also bring up local Elasticsearch, you could even use your already setup ES # I have defined a common network for these 2 services so that containers can communicate with each other @@ -12,8 +12,8 @@ services: chatbot-ner: build: context: .. - dockerfile: docker/Dockerfile-python3 -# Vars being used are defined in config.example and used in settings.py + dockerfile: docker/Dockerfile +# Vars being used are defined in .env.example and used in settings.py # ENV vars defined in Dockerfile can be overwritten here before docker-compose up # just add to .env env_file: diff --git a/docker/install.sh b/docker/install.sh deleted file mode 100755 index 721b95540..000000000 --- a/docker/install.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -# Get sample Nginx file for routing and GUI -cd /tmp -wget "https://s3-us-west-2.amazonaws.com/chatbotner/chatbot_ner_nginx/default" -bash -c "cat /tmp/default > /etc/nginx/sites-available/default" -rm -rf /etc/nginx/sites-enabled/default -ln -s /etc/nginx/sites-available/default /etc/nginx/sites-enabled/default -sed -i 's/www-data/root/g' /etc/nginx/nginx.conf diff --git a/docker/nginx.conf b/docker/nginx.conf new file mode 100644 index 000000000..1930b3050 --- /dev/null +++ b/docker/nginx.conf @@ -0,0 +1,27 @@ +# This file goes to /etc/nginx/nginx.conf +user root; +worker_processes auto; +pid /run/nginx.pid; +include /etc/nginx/modules-enabled/*.conf; + +events { + worker_connections 768; + # multi_accept on; +} + +http { + sendfile on; + tcp_nopush on; + tcp_nodelay on; + keepalive_timeout 65; + types_hash_max_size 2048; + include /etc/nginx/mime.types; + default_type application/octet-stream; + ssl_protocols TLSv1 TLSv1.1 TLSv1.2; # Dropping SSLv3, ref: POODLE + ssl_prefer_server_ciphers on; + access_log /var/log/nginx/access.log; + error_log /var/log/nginx/error.log; + gzip on; + include /etc/nginx/conf.d/*.conf; + include /etc/nginx/sites-enabled/*; +} diff --git a/docker/supervisord.conf b/docker/supervisord.conf index 043461263..d39a38153 100644 --- a/docker/supervisord.conf +++ b/docker/supervisord.conf @@ -4,8 +4,8 @@ nodaemon=true # Fill in values from ENV [program:uwsgi] -command=uwsgi --wsgi-file chatbot_ner/wsgi.py --http :%(ENV_PORT)s --workers=%(ENV_NUM_WORKERS)s --disable-logging --master --max-requests=%(ENV_MAX_REQUESTS)s --harakiri=%(ENV_TIMEOUT)s --reload-mercy=120 --worker-reload-mercy=120 --thunder-lock --http-auto-chunked --http-keepalive --vacuum -stdout_logfile= /dev/stdout +command=uwsgi --wsgi-file chatbot_ner/wsgi.py --http :%(ENV_PORT)s --strict --need-app --master --workers=%(ENV_NUM_WORKERS)s --threads 2 --enable-threads --disable-logging --log-5xx --log-prefix=uwsgi --log-slow=3000 --logto=/app/logs/ner_log.log --logfile-chmod=644 --max-requests=%(ENV_MAX_REQUESTS)s --harakiri=%(ENV_TIMEOUT)s --reload-mercy=60 --worker-reload-mercy=60 --thunder-lock --http-auto-chunked --http-keepalive --vacuum --single-interpreter --buffer-size=15000 +stdout_logfile=/dev/stdout stdout_logfile_maxbytes=0 stderr_logfile=/dev/stderr stderr_logfile_maxbytes=0 diff --git a/docs/datastore_variables.md b/docs/datastore_variables.md index 7e5fa463c..4fabdf965 100644 --- a/docs/datastore_variables.md +++ b/docs/datastore_variables.md @@ -1,8 +1,8 @@ ## DataStore Environment Variables Description -A `config.example` file is provided at the root of the repository. It is an example file containing all variables that Chatbot NER sets in the environment for future use when connecting to the underlying engine. +A `.env.example` file is provided at the root of the repository. It is an example file containing all variables that Chatbot NER sets in the environment for future use when connecting to the underlying engine. -Copy it to a file called `config` at the root of the repository and edit it to configure your settings. Chatbot NER will read and set variables from this file into the environment. +Copy it to a file called `.env` at the root of the repository and edit it to configure your settings. Chatbot NER will read and set variables from this file into the environment. > **Note**: Do not use quotes while specifying values for variables. Do not put spaces around the '=' sign. > @@ -22,7 +22,7 @@ Copy it to a file called `config` at the root of the repository and edit it to c - `ENGINE` - This specifies the which engine to use. In case multiple engines are supported and configured in the `config` file, the settings under the value provided by `ENGINE` are used. In other words, the value of `ENGINE` is used as key to access its connection settings from the constructed dictionary as shown above. + This specifies the which engine to use. In case multiple engines are supported and configured in the `.env` file, the settings under the value provided by `ENGINE` are used. In other words, the value of `ENGINE` is used as key to access its connection settings from the constructed dictionary as shown above. At this point only Elasticsearch is supported and valid values for engine are: @@ -62,9 +62,9 @@ Copy it to a file called `config` at the root of the repository and edit it to c -#### Example `config` file +#### Example `.env` file ---------- -Please check [config.example](../config.example) file +Please check [.env.example](../.env.example) file diff --git a/docs/install.md b/docs/install.md index 230698afc..04a7dd158 100644 --- a/docs/install.md +++ b/docs/install.md @@ -51,8 +51,7 @@ Following are the steps to create the Docker image and run NER with Docker. ```shell git clone https://github.com/hellohaptik/chatbot_ner.git cd chatbot_ner -cp config.example .env # (This will have all the basic environment variables to get started, You can update values accordingly) -cp .env docker/.env +cp .env.example docker/.env # (This will have all the basic environment variables to get started, You can update values accordingly) cd docker docker-compose up --build -d ``` diff --git a/manage.py b/manage.py index 96190a83d..d5dd9287e 100755 --- a/manage.py +++ b/manage.py @@ -1,11 +1,22 @@ #!/usr/bin/env python -from __future__ import absolute_import +"""Django's command-line utility for administrative tasks.""" import os import sys -if __name__ == "__main__": - os.environ.setdefault("DJANGO_SETTINGS_MODULE", "chatbot_ner.settings") - - from django.core.management import execute_from_command_line +def main(): + """Run administrative tasks.""" + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'chatbot_ner.settings') + try: + from django.core.management import execute_from_command_line + except ImportError as exc: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) from exc execute_from_command_line(sys.argv) + + +if __name__ == '__main__': + main() diff --git a/requirements.txt b/requirements.txt index 2847dce00..1f3b8017e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,34 +1,54 @@ -phonenumberslite==8.10.18 -six==1.11.0 -pytz==2014.2 -nltk==3.4.5 -numpy==1.16.5 -elasticsearch==5.5.0 -elastic-apm==5.10.0 -requests==2.20.0 +# Python-C stuff +Cython==0.28.2 +six==1.15.0 + +# WSGI compatible server +uwsgi==2.0.19.1 + +# AWS libs +botocore==1.21.35 +s3transfer==0.5.0 +boto3==1.18.35 + +scipy==1.4.1 +numpy==1.19.2 +pandas==1.0.5 + +urllib3==1.26.5 +requests==2.26.0 requests-aws4auth==0.9 -Django==1.11.29 -django-dotenv==1.4.2 +Django==3.2.7 + +elasticsearch==5.5.3 +nltk==3.4.5 +phonenumberslite==8.10.18 +pyaml==19.4.1 +python-dateutil==2.7.3 +pytz==2020.1 +regex==2020.7.14 weighted-levenshtein==0.1 -regex==2018.7.11 word2number==1.1 -boto==2.49.0 -boto3==1.8.4 -python-dateutil==2.7.3 -pandas==0.21.0 -pyaml==19.4.1 + +# Spacy and models spacy==2.3.2 https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz#egg=en_core_web_sm https://github.com/explosion/spacy-models/releases/download/nl_core_news_sm-2.3.0/nl_core_news_sm-2.3.0.tar.gz#egg=nl_core_news_sm https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-2.3.0/fr_core_news_sm-2.3.0.tar.gz#egg=fr_core_news_sm https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-2.3.0/de_core_news_sm-2.3.0.tar.gz#egg=de_core_news_sm https://github.com/explosion/spacy-models/releases/download/es_core_news_sm-2.3.1/es_core_news_sm-2.3.1.tar.gz#egg=es_core_news_sm -typing==3.6.2 -flake8==3.4.1 -mock==2.0.0 + +# Alerting and APMs +newrelic==3.4.0.95 +elastic-apm==6.5.0 +sentry-sdk==1.3.0 + +# Tests +django-nose==1.4.7 +mock==3.0.5 coverage==5.5 nose-exclude==0.5.0 -django-nose==1.4.7 -sentry-sdk==0.20.3 + +# Other convenience libs, ideally should only be installed in dev +flake8==3.4.1 jedi==0.17.2 ipython==7.16.1