Skip to content

Commit

Permalink
Merge pull request #375 from hellohaptik/develop
Browse files Browse the repository at this point in the history
Develop to Master
  • Loading branch information
ankur09011 authored Sep 29, 2020
2 parents e7c3146 + 8e6ecb3 commit 1df0370
Show file tree
Hide file tree
Showing 25 changed files with 2,870 additions and 170 deletions.
17 changes: 17 additions & 0 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
## JIRA Ticket Number

JIRA TICKET:

## Description of change
(REMOVE ME) Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change.

## Checklist (OPTIONAL):

- [ ] My code follows the style guidelines of this project
- [ ] I have performed a self-review of my own code
- [ ] I have commented my code, particularly in hard-to-understand areas
- [ ] I have made corresponding changes to the documentation
- [ ] My changes generate no new warnings
- [ ] I have added tests that prove my fix is effective or that my feature works
- [ ] New and existing unit tests pass locally with my changes
- [ ] Any dependent changes have been merged and published in downstream modules
1 change: 1 addition & 0 deletions chatbot_ner/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
url(r'^v2/number/$', api_v2.number),
url(r'^v2/phone_number/$', api_v2.phone_number),
url(r'^v2/number_range/$', api_v2.number_range),
url(r'^v2/text/$', api_v2.text),

# V2 bulk detectors
url(r'^v2/date_bulk/$', api_v2.date),
Expand Down
2 changes: 1 addition & 1 deletion config.example
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ ES_ALIAS=entity_data
ES_INDEX_1=entity_data_v1
ES_INDEX_2=
ES_DOC_TYPE=data_dictionary
ELASTICSEARCH_CRF_DATA_DOC_TYPE=training_dictionary
ELASTICSEARCH_CRF_DATA_INDEX_NAME=entity_examples_data
ELASTICSEARCH_CRF_DATA_DOC_TYPE=training_dictionary

ES_BULK_MSG_SIZE=1000
ES_SEARCH_SIZE=10000
Expand Down
17 changes: 9 additions & 8 deletions datastore/datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,6 @@ def create(self, err_if_exists=True, **kwargs):

if self._engine == ELASTICSEARCH:
es_url = elastic_search.connect.get_es_url()
es_object = elastic_search.transfer.ESTransfer(source=es_url, destination=None)
create_map = [ # TODO: use namedtuples
(True, ELASTICSEARCH_INDEX_1, ELASTICSEARCH_DOC_TYPE, self._store_name,
self._check_doc_type_for_elasticsearch, elastic_search.create.create_entity_index),
Expand Down Expand Up @@ -180,8 +179,10 @@ def create(self, err_if_exists=True, **kwargs):
**kwargs
)
if alias_name:
es_object.point_an_alias_to_index(es_url=es_url, alias_name=self._store_name,
index_name=index_name)
elastic_search.create.create_alias(connection=self._client_or_connection,
index_list=[index_name],
alias_name=alias_name,
logger=ner_logger)

def delete(self, err_if_does_not_exist=True, **kwargs):
"""
Expand All @@ -208,15 +209,15 @@ def delete(self, err_if_does_not_exist=True, **kwargs):
self._connect()

if self._engine == ELASTICSEARCH:
for index_key in [ELASTICSEARCH_INDEX_1, ELASTICSEARCH_INDEX_2, ELASTICSEARCH_CRF_DATA_INDEX_NAME]:
if self._connection_settings.get(index_key):
delete_map = [ELASTICSEARCH_INDEX_1, ELASTICSEARCH_INDEX_2, ELASTICSEARCH_CRF_DATA_INDEX_NAME]
for index_name_key in delete_map:
if self._connection_settings.get(index_name_key):
index_name = self._connection_settings.get(index_name_key)
elastic_search.create.delete_index(connection=self._client_or_connection,
index_name=self._store_name,
index_name=index_name,
logger=ner_logger,
err_if_does_not_exist=err_if_does_not_exist,
**kwargs)
# TODO: cleanup aliases ?

# === Incompatible or deprecated/duplicate APIs

# FIXME: repopulate does not consider language of the variants
Expand Down
81 changes: 70 additions & 11 deletions datastore/elastic_search/create.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
import logging
from typing import List, Dict, Any

from elasticsearch import Elasticsearch
from elasticsearch.exceptions import NotFoundError

from .utils import filter_kwargs

log_prefix = 'datastore.elastic_search.create'


def exists(connection, index_name):
# type: (Elasticsearch, str) -> bool
"""
Checks if index_name exists
Expand All @@ -18,13 +25,15 @@ def exists(connection, index_name):


def delete_index(connection, index_name, logger, err_if_does_not_exist=True, **kwargs):
# type: (Elasticsearch, str, logging.Logger, bool, **Any) -> None
"""
Deletes the index named index_name
Args:
connection: Elasticsearch client object
index_name: The name of the index
logger: logging object to log at debug and exception level
err_if_does_not_exist: if to raise error if index does not exist already, defaults to True
kwargs:
body: The configuration for the index (settings and mappings)
master_timeout: Specify timeout for connection to master
Expand All @@ -40,11 +49,17 @@ def delete_index(connection, index_name, logger, err_if_does_not_exist=True, **k
else:
return

try:
delete_alias(connection=connection, index_list=[index_name], alias_name='_all', logger=logger)
except NotFoundError:
logger.warning('No aliases found on on index %s', index_name)

connection.indices.delete(index=index_name, **kwargs)
logger.debug('%s: Delete Index %s: Operation successfully completed', log_prefix, index_name)


def _create_index(connection, index_name, doc_type, logger, mapping_body, err_if_exists=True, **kwargs):
# type: (Elasticsearch, str, str, logging.Logger, Dict[str, Any], bool, **Any) -> None
"""
Creates an Elasticsearch index needed for similarity based searching
Args:
Expand All @@ -53,6 +68,7 @@ def _create_index(connection, index_name, doc_type, logger, mapping_body, err_if
doc_type: The type of the documents that will be indexed
logger: logging object to log at debug and exception level
mapping_body: dict, mappings to put on the index
err_if_exists: if to raise error if the index already exists, defaults to True
kwargs:
master_timeout: Specify timeout for connection to master
timeout: Explicit operation timeout
Expand Down Expand Up @@ -118,6 +134,7 @@ def _create_index(connection, index_name, doc_type, logger, mapping_body, err_if


def create_entity_index(connection, index_name, doc_type, logger, **kwargs):
# type: (Elasticsearch, str, str, logging.Logger, **Any) -> None
"""
Creates an mapping specific to entity storage in elasticsearch and makes a call to create_index
to create the index with the given mapping body
Expand Down Expand Up @@ -145,10 +162,32 @@ def create_entity_index(connection, index_name, doc_type, logger, **kwargs):
mapping_body = {
doc_type: {
'properties': {
'language_script': {
'type': 'text',
'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}},
},
'value': {
'type': 'text',
'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}},
},
'variants': {
'type': 'text',
'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}},
'analyzer': 'my_analyzer',
'norms': {'enabled': False}, # Needed if we want to give longer variants higher scores
},
# other removed/unused fields, kept only for backward compatibility
'dict_type': {
'type': 'text',
'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}},
},
'entity_data': {
'type': 'text',
'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}},
},
'source_language': {
'type': 'text',
'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}},
}
}
}
Expand All @@ -158,6 +197,7 @@ def create_entity_index(connection, index_name, doc_type, logger, **kwargs):


def create_crf_index(connection, index_name, doc_type, logger, **kwargs):
# type: (Elasticsearch, str, str, logging.Logger, **Any) -> None
"""
This method is used to create an index with mapping suited for story training_data
Args:
Expand All @@ -184,17 +224,17 @@ def create_crf_index(connection, index_name, doc_type, logger, **kwargs):
mapping_body = {
doc_type: {
'properties': {
"entity_data": {
"type": "text"
'entity_data': {
'type': 'text'
},
"sentence": {
"enabled": "false"
'sentence': {
'enabled': False
},
"entities": {
"enabled": "false"
'entities': {
'enabled': False
},
"language_script": {
"type": "text"
'language_script': {
'type': 'text'
}
}
}
Expand All @@ -204,17 +244,36 @@ def create_crf_index(connection, index_name, doc_type, logger, **kwargs):


def create_alias(connection, index_list, alias_name, logger, **kwargs):
# type: (Elasticsearch, List[str], str, logging.Logger, **Any) -> None
"""
This method is used to create alias for list of indices
Args:
connection:
connection: Elasticsearch client object
index_list (list): List of indices the alias has to point to
alias_name (str): Name of the alias
logger: logging object to log at debug and exception level
**kwargs:
https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-aliases.html
"""
logger.debug('Alias creation %s started %s' % alias_name)
logger.debug('Putting alias %s to indices: %s', alias_name, str(index_list))
connection.indices.put_alias(index=index_list, name=alias_name, **kwargs)
logger.debug('Alias %s now points to indices %s' % (alias_name, str(index_list)))
logger.debug('Alias %s now points to indices %s', alias_name, str(index_list))


def delete_alias(connection, index_list, alias_name, logger, **kwargs):
# type: (Elasticsearch, List[str], str, logging.Logger, **Any) -> None
"""
Delete alias `alias_name` from list of indices in `index_list`
Args:
connection: Elasticsearch client object
index_list (list): List of indices the alias has to point to
alias_name (str): Name of the alias
logger: logging object to log at debug and exception level
**kwargs:
https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-aliases.html
"""
logger.debug('Removing alias %s from indices: %s', alias_name, str(index_list))
connection.indices.delete_alias(index=index_list, name=alias_name, **kwargs)
logger.debug('Alias %s removed from indices %s', alias_name, str(index_list))
5 changes: 5 additions & 0 deletions ner_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@

ENTITY_VALUE_DICT_KEY = 'value'

# datastore_verified a key to verify value from the datastore
DATASTORE_VERIFIED = 'datastore_verified'
# model_verified a key to verify value from the model
MODEL_VERIFIED = 'model_verified'

# ************************ constants tell us what to do with structured_value ************************
# This will execute entity detection on the structured_value.
STRUCTURED = 0
Expand Down
Loading

0 comments on commit 1df0370

Please sign in to comment.